#Set working directory to appropriate folder for inputs and outputs on Google Drive
#Initialize
rm(list = ls())
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(Seurat)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching SeuratObject
Attaching sp
library(ggplot2)
library(RColorBrewer)
library(ggpubr)
library(pheatmap)
library(viridis)
Loading required package: viridisLite
library(xlsx)
`%nin%` = Negate(`%in%`)
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Assign_dominant_barcodes/all_data_final_lineages.RData')
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Preprocess_GEX/second_timepoint_merged.RData')
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Filtering_cDNA/resistant_lineage_lists.RData')
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Assign_dominant_barcodes/cis_final_lineages.RData')
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Assign_dominant_barcodes/cocl2_final_lineages.RData')
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Assign_dominant_barcodes/dabtram_final_lineages.RData')
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Assign_dominant_barcodes/dabtram_both_times_final_lineages.RData')
#find lineages that are maintained at both dabtram timepoints
fivecell_cDNA$DabTramMaintained <- Reduce(intersect, list(fivecell_cDNA$DabTram, fivecell_cDNA$DabTramtoDabTram))
filtered_meta <- rep(0, length(names(all_data$Lineage)))
#specify which cells are in lineages that pass filtering for that condition
filtered_meta[which(all_data$OG_condition == "dabtram" & all_data$Lineage %in% combined_lins_list$DabTram)] <- 'Resistant to DabTram'
filtered_meta[which(all_data$OG_condition == "dabtramtodabtram" & all_data$Lineage %in% combined_lins_list$DabTramtoDabTram)] <- 'Resistant to DabTramtoDabTram'
filtered_meta[which(all_data$OG_condition == "dabtramtococl2" & all_data$Lineage %in% combined_lins_list$DabTramtoCoCl2)] <- 'Resistant to DabTramtoCoCl2'
filtered_meta[which(all_data$OG_condition == "dabtramtocis" & all_data$Lineage %in% combined_lins_list$DabTramtoCis)] <- 'Resistant to DabTramtoCis'
filtered_meta[which(all_data$OG_condition == "cocl2" & all_data$Lineage %in% combined_lins_list$CoCl2)] <- 'Resistant to CoCl2'
filtered_meta[which(all_data$OG_condition == "cocl2todabtram" & all_data$Lineage %in% combined_lins_list$CoCl2toDabTram)] <- 'Resistant to CoCl2toDabTram'
filtered_meta[which(all_data$OG_condition == "cocl2tococl2" & all_data$Lineage %in% combined_lins_list$CoCl2toCoCl2)] <- 'Resistant to CoCl2toCoCl2'
filtered_meta[which(all_data$OG_condition == "cocl2tocis" & all_data$Lineage %in% combined_lins_list$CoCl2toCis)] <- 'Resistant to CoCl2toCis'
filtered_meta[which(all_data$OG_condition == "cis" & all_data$Lineage %in% combined_lins_list$Cis)] <- 'Resistant to Cis'
filtered_meta[which(all_data$OG_condition == "cistodabtram" & all_data$Lineage %in% combined_lins_list$CistoDabTram)] <- 'Resistant to CistoDabTram'
filtered_meta[which(all_data$OG_condition == "cistococl2" & all_data$Lineage %in% combined_lins_list$CistoCoCl2)] <- 'Resistant to CistoCoCl2'
filtered_meta[which(all_data$OG_condition == "cistocis" & all_data$Lineage %in% combined_lins_list$CistoCis)] <- 'Resistant to CistoCis'
#specify which cells are in lineages of more than 5 cells
filtered_meta[which(all_data$OG_condition == "dabtram" & all_data$Lineage %in% fivecell_cDNA$DabTram)] <- 'Large Resistant to DabTram'
filtered_meta[which(all_data$OG_condition == "dabtramtodabtram" & all_data$Lineage %in% fivecell_cDNA$DabTramtoDabTram)] <- 'Large Resistant to DabTramtoDabTram'
filtered_meta[which(all_data$OG_condition == "dabtramtococl2" & all_data$Lineage %in% fivecell_cDNA$DabTramtoCoCl2)] <- 'Large Resistant to DabTramtoCoCl2'
filtered_meta[which(all_data$OG_condition == "dabtramtocis" & all_data$Lineage %in% fivecell_cDNA$DabTramtoCis)] <- 'Large Resistant to DabTramtoCis'
filtered_meta[which(all_data$OG_condition == "cocl2" & all_data$Lineage %in% fivecell_cDNA$CoCl2)] <- 'Large Resistant to CoCl2'
filtered_meta[which(all_data$OG_condition == "cocl2todabtram" & all_data$Lineage %in% fivecell_cDNA$CoCl2toDabTram)] <- 'Large Resistant to CoCl2toDabTram'
filtered_meta[which(all_data$OG_condition == "cocl2tococl2" & all_data$Lineage %in% fivecell_cDNA$CoCl2toCoCl2)] <- 'Large Resistant to CoCl2toCoCl2'
filtered_meta[which(all_data$OG_condition == "cocl2tocis" & all_data$Lineage %in% fivecell_cDNA$CoCl2toCis)] <- 'Large Resistant to CoCl2toCis'
filtered_meta[which(all_data$OG_condition == "cis" & all_data$Lineage %in% fivecell_cDNA$Cis)] <- 'Large Resistant to Cis'
filtered_meta[which(all_data$OG_condition == "cistodabtram" & all_data$Lineage %in% fivecell_cDNA$CistoDabTram)] <- 'Large Resistant to CistoDabTram'
filtered_meta[which(all_data$OG_condition == "cistococl2" & all_data$Lineage %in% fivecell_cDNA$CistoCoCl2)] <- 'Large Resistant to CistoCoCl2'
filtered_meta[which(all_data$OG_condition == "cistocis" & all_data$Lineage %in% fivecell_cDNA$CistoCis)] <- 'Large Resistant to CistoCis'
# filtered_meta[which(all_data$OG_condition == "dabtram" & all_data$Lineage %in% fivecell_cDNA$DabTramMaintained)] <- 'Maintained Resistant to DabTram'
# filtered_meta[which(all_data$OG_condition == "dabtramtodabtram" & all_data$Lineage %in% fivecell_cDNA$DabTramMaintained)] <- 'Maintained Resistant to DabTramtoDabTram'
#specify which cells are in lineages that did not pass filtering
filtered_meta[which(all_data$OG_condition == "dabtram" & all_data$Lineage %nin% combined_lins_list$DabTram & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
filtered_meta[which(all_data$OG_condition == "dabtramtodabtram" & all_data$Lineage %nin% combined_lins_list$DabTramtoDabTram & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
filtered_meta[which(all_data$OG_condition == "dabtramtococl2" & all_data$Lineage %nin% combined_lins_list$DabTramtoCoCl2 & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
filtered_meta[which(all_data$OG_condition == "dabtramtocis" & all_data$Lineage %nin% combined_lins_list$DabTramtoCis & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
filtered_meta[which(all_data$OG_condition == "cocl2" & all_data$Lineage %nin% combined_lins_list$CoCl2 & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
filtered_meta[which(all_data$OG_condition == "cocl2todabtram" & all_data$Lineage %nin% combined_lins_list$CoCl2toDabTram & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
filtered_meta[which(all_data$OG_condition == "cocl2tococl2" & all_data$Lineage %nin% combined_lins_list$CoCl2toCoCl2 & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
filtered_meta[which(all_data$OG_condition == "cocl2tocis" & all_data$Lineage %nin% combined_lins_list$CoCl2toCis & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
filtered_meta[which(all_data$OG_condition == "cis" & all_data$Lineage %nin% combined_lins_list$Cis & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
filtered_meta[which(all_data$OG_condition == "cistodabtram" & all_data$Lineage %nin% combined_lins_list$CistoDabTram & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
filtered_meta[which(all_data$OG_condition == "cistococl2" & all_data$Lineage %nin% combined_lins_list$CistoCoCl2 & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
filtered_meta[which(all_data$OG_condition == "cistocis" & all_data$Lineage %nin% combined_lins_list$CistoCis & all_data$Lineage %nin% c("No Barcode", "Still multiple"))] <- 'Filtered out'
#specify which cells had zero or multiple barcodes
filtered_meta[which(all_data$Lineage %in% c("No Barcode", "Still multiple"))] <- 'No Barcode'
print(table(filtered_meta))
filtered_meta
Filtered out Large Resistant to Cis
3337 1375
Large Resistant to CistoCis Large Resistant to CistoCoCl2
951 2078
Large Resistant to CistoDabTram Large Resistant to CoCl2
1394 1784
Large Resistant to CoCl2toCis Large Resistant to CoCl2toCoCl2
3010 11578
Large Resistant to CoCl2toDabTram Large Resistant to DabTram
663 478
Large Resistant to DabTramtoCis Large Resistant to DabTramtoCoCl2
4234 2840
Large Resistant to DabTramtoDabTram No Barcode
3176 35314
Resistant to Cis Resistant to CistoCis
331 67
Resistant to CistoCoCl2 Resistant to CistoDabTram
135 113
Resistant to CoCl2 Resistant to CoCl2toCis
278 157
Resistant to CoCl2toCoCl2 Resistant to CoCl2toDabTram
55 93
Resistant to DabTram Resistant to DabTramtoCis
337 225
Resistant to DabTramtoCoCl2 Resistant to DabTramtoDabTram
100 222
# Find the mean of the average pearson correlation per lineage
mean_pearson_dabtram <- mean(unlist(lapply(dabtram_lin_pearson_list, mean))) # True mean of average correlations per lineage
means_pearson_dabtram_sim <- sapply(1:length(dabtram_lin_pearson_rand_list), function (y)
mean(unlist(lapply(dabtram_lin_pearson_rand_list[[y]], mean)))) # list of mean of average correlations per lineage
z_mean_pearson_dabtram <- (mean_pearson_dabtram-mean(means_pearson_dabtram_sim))/sd(means_pearson_dabtram_sim) # Z score comparing mean to simulations
pval_mean_pearson_dabtram <- pnorm(z_mean_pearson_dabtram, mean(means_pearson_dabtram_sim), sd(means_pearson_dabtram_sim), lower.tail = F) # calculate p value from z score
# Find the weighted means of the average pearson correlations per lineage
weighted_mean_pearson_dabtram <- weighted.mean(unlist(lapply(dabtram_lin_pearson_list, mean)),
unlist(lapply(dabtram_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_dabtram_sim <- sapply(1:length(dabtram_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(dabtram_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(dabtram_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
z_wmean_pearson_dabtram <- (weighted_mean_pearson_dabtram-mean(weighted_means_pearson_dabtram_sim))/sd(weighted_means_pearson_dabtram_sim) # Z score comparing mean to simulations
pval_wmean_pearson_dabtram <- pnorm(z_wmean_pearson_dabtram, mean(weighted_means_pearson_dabtram_sim), sd(weighted_means_pearson_dabtram_sim), lower.tail = F) # calculate p value from z score
# Compare each individual distribution of pearson correlations to the observed correlation by wilcoxon rank sum test and track pval
wilcox_pval_dabtram <- c()
for (i in 1:length(dabtram_lin_pearson_rand_list)){
sim_means <- unlist(lapply(dabtram_lin_pearson_rand_list[[i]], mean))
wilcox_pval_dabtram <- cbind(wilcox_pval_dabtram, wilcox.test(x = unlist(lapply(dabtram_lin_pearson_list, mean)),
y = sim_means, alternative = 'greater')$p.value)
}
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(dabtram_lin_pearson_list, :
cannot compute exact p-value with ties
# Save outputs
save(dabtram, dabtram_lin_pearson_list, dabtram_lin_pearson_rand_list, z_mean_pearson_dabtram, pval_mean_pearson_dabtram, z_wmean_pearson_dabtram, pval_wmean_pearson_dabtram, wilcox_pval_dabtram, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/dabtram_pearson_sim_results.RData')
rm(dabtram,dabtram_lin_pearson_list, dabtram_lin_pearson_rand_list, dabtram_input_data)
Idents(all_data) <- all_data$OG_condition # Change the idents to the OG condition for subsetting to dabtramtodabtram
dabtramtodabtram <- subset(all_data, idents = 'dabtramtodabtram') # Subset down to the dabtramtodabtram object
dabtramtodabtram <- NormalizeData(dabtramtodabtram)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
dabtramtodabtram <- FindVariableFeatures(dabtramtodabtram, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
dabtramtodabtram <- ScaleData(dabtramtodabtram)
Centering and scaling data matrix
|
| | 0%
|
|============================================== | 50%
|
|===========================================================================================| 100%
dabtramtodabtram <- RunPCA(dabtramtodabtram)
PC_ 1
Positive: AEBP1, SORBS2, PSAP, PMP22, CALD1, CRYAB, CTSK, CEBPD, MALAT1, NUPR1
PLP1, NEAT1, DEPP1, FAM89A, SOX4, GAS7, SPARC, MBP, MLANA, PHACTR1
FBXO32, GPM6B, TXNIP, SOX10, S100B, CBLB, PDZRN3, ID4, AKAP12, PMEL
Negative: MKI67, CENPF, TPX2, STMN1, RRM2, UBE2C, PCLAF, TOP2A, ANLN, UBE2S
BIRC5, H2AFZ, TK1, CKS1B, GTSE1, PRC1, NUSAP1, HIST1H4C, UBE2T, HIST1H1B
DTYMK, ASPM, TUBB, TMPO, TUBA1B, CEP55, HIST1H1D, FOSL1, ZWINT, RANBP1
PC_ 2
Positive: VCAM1, COL14A1, IGFBP5, C1R, APOE, TXNRD1, COL1A1, COL6A1, CTSD, PDE5A
ADM, SLC7A11, IL6ST, SQSTM1, RND3, IRF1, HLA-B, LINC00968, DCN, LAMB1
PLAAT4, NQO1, LUM, BOC, CTSC, NFE2L2, CARD16, WNT5A, MGST1, LBP
Negative: CRYAB, RPL26, S100B, FABP5, MIF, MT2A, PLP1, S100A1, GAS7, PHACTR1
SH3BGRL3, MLANA, H2AFZ, SOX10, LIMA1, C3orf14, S100A10, CHPF, H2AFJ, C12orf75
TAGLN2, RNASEK, COL9A3, CSTB, GPM6B, ITGA6, NGFR, VGF, RPL22L1, UBE2C
PC_ 3
Positive: TMSB4X, C12orf75, CAV1, ARL4C, CPA4, CEMIP, RPS27L, DKK1, S100A6, CTHRC1
SERPINE1, DSTN, S100A11, RPL26, SFRP1, AC092807.3, DAB2, TIMP3, CSTB, RPL22L1
LMO7, S100A16, TNIK, RNASEK, SMYD3, SRGN, SPOCK1, CCL2, RPL12, LINC01638
Negative: GAS7, CRYAB, PLP1, COL9A3, SOX10, AKAP12, PHACTR1, NGFR, PMP22, MLANA
S100B, TXNIP, SOX6, ITGA6, CTSD, PMEPA1, EPB41L3, FXYD3, GPM6B, PMP2
NFATC2, EPB41L2, TSC22D1, EMP1, AL139383.1, TRIM2, TFAP2A, S100A1, CITED1, MGP
PC_ 4
Positive: MALAT1, HSP90B1, FN1, MT-ND1, MT-ATP6, SERPINE2, MT-ND2, MT-ND4, TIMP3, SPARC
MFGE8, NEAT1, CALR, THBS1, SPOCK1, LMO7, HSPA5, GLS, IGFBP7, LIMA1
ANXA2, HSPA8, EIF4A1, PRNP, SORBS2, MYH9, PHLDB2, COL5A2, PRSS23, MT-ND6
Negative: FTH1, FTL, MIF, TXN, RPL12, RPL26, MGST1, S100A6, CSTB, GYPC
ATOX1, RPL34, TOMM5, ATP5MC1, PTGR1, RPS27L, CENPX, RNASEK, ITGAE, H2AFZ
KYNU, SOST, CHCHD10, GAPDH, CYTL1, AKR1B10, AKR1C3, BOC, JPT1, MMD
PC_ 5
Positive: SH3BGRL3, S100B, S100A10, GAS7, ATOX1, CENPX, COL9A3, GYPC, CLSPN, PHLDA2
HSPG2, IFI27, NGFR, ATP5MC1, CRYAB, E2F1, CALM2, PMP2, S100A11, CALD1
DTL, COTL1, HELLS, ATAD2, CCNE2, DKK1, S100A6, EPB41L3, SOX6, ITGA2
Negative: CTSK, CCNB1, PMEL, PSAP, ASPM, CENPE, HMMR, PLK1, MT-ATP6, AURKA
DLGAP5, FBXO32, CTSL, CENPF, LGALS3, NEK2, CCNB2, IGFN1, CDC20, APOE
ARL6IP1, KIF14, TPX2, CENPA, DEPDC1, MT-ND2, PTTG1, TSPAN10, CKAP2, SLC12A8
ElbowPlot(dabtramtodabtram) # The standard deviation seems to really level off at 10
# Recluster with the appropriate number of dimensions
dabtramtodabtram <- FindNeighbors(dabtramtodabtram, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
dabtramtodabtram <- FindClusters(dabtramtodabtram, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 6906
Number of edges: 231004
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8325
Number of communities: 10
Elapsed time: 0 seconds
dabtramtodabtram <- RunUMAP(dabtramtodabtram, dims = 1:15)
10:43:18 UMAP embedding parameters a = 0.9922 b = 1.112
10:43:18 Read 6906 rows and found 15 numeric columns
10:43:18 Using Annoy for neighbor search, n_neighbors = 30
10:43:18 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:43:18 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//Rtmpx16qOy/filec5f898b3add
10:43:18 Searching Annoy index using 1 thread, search_k = 3000
10:43:20 Annoy recall = 100%
10:43:20 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
10:43:21 Initializing from normalized Laplacian + noise (using irlba)
10:43:21 Commencing optimization for 500 epochs, with 286942 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:43:30 Optimization finished
DimPlot(dabtramtodabtram, reduction = 'umap', pt.size = 1)
# Get the scaled data from the dabtramtodabtram object
dabtramtodabtram_input_data <- GetAssayData(dabtramtodabtram, assay = 'RNA', slot = 'scale.data')
# Build list of lineages with at least 5 cells in dab tram and do pearson correlation
dabtramtodabtram_lin_pearson_list <- list()
for (i in fivecell_cDNA$DabTramtoDabTram){
temp_pearson <- cor(dabtramtodabtram_input_data[,colnames(dabtramtodabtram)[dabtramtodabtram$Lineage == i]])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
dabtramtodabtram_lin_pearson_list[[i]] <- temp_pearson_filt
}
# Need to do a random sampling of the same thing
dabtramtodabtram_lin_pearson_rand_list <- list()
num_iter <- 100
for(j in 1:num_iter){
dabtramtodabtram_lin_pearson_rand_list[[j]] <- list()
for (i in fivecell_cDNA$DabTramtoDabTram){
set.seed(j)
num_cells <- length(dabtramtodabtram$Lineage[dabtramtodabtram$Lineage == i])
temp_pearson <- cor(dabtramtodabtram_input_data[,sample(colnames(dabtramtodabtram), num_cells, replace = F)])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
dabtramtodabtram_lin_pearson_rand_list[[j]][[i]] <- temp_pearson_filt
}
}
# Find the mean of the average pearson correlation per lineage
mean_pearson_dabtramtodabtram <- mean(unlist(lapply(dabtramtodabtram_lin_pearson_list, mean))) # True mean of average correlations per lineage
means_pearson_dabtramtodabtram_sim <- sapply(1:length(dabtramtodabtram_lin_pearson_rand_list), function (y)
mean(unlist(lapply(dabtramtodabtram_lin_pearson_rand_list[[y]], mean)))) # list of mean of average correlations per lineage
z_mean_pearson_dabtramtodabtram <- (mean_pearson_dabtramtodabtram-mean(means_pearson_dabtramtodabtram_sim))/sd(means_pearson_dabtramtodabtram_sim) # Z score comparing mean to simulations
pval_mean_pearson_dabtramtodabtram <- pnorm(z_mean_pearson_dabtramtodabtram, mean(means_pearson_dabtramtodabtram_sim), sd(means_pearson_dabtramtodabtram_sim), lower.tail = F) # calculate p value from z score
# Find the weighted means of the average pearson correlations per lineage
weighted_mean_pearson_dabtramtodabtram <- weighted.mean(unlist(lapply(dabtramtodabtram_lin_pearson_list, mean)),
unlist(lapply(dabtramtodabtram_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_dabtramtodabtram_sim <- sapply(1:length(dabtramtodabtram_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(dabtramtodabtram_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(dabtramtodabtram_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
z_wmean_pearson_dabtramtodabtram <- (weighted_mean_pearson_dabtramtodabtram-mean(weighted_means_pearson_dabtramtodabtram_sim))/sd(weighted_means_pearson_dabtramtodabtram_sim) # Z score comparing mean to simulations
pval_wmean_pearson_dabtramtodabtram <- pnorm(z_wmean_pearson_dabtramtodabtram, mean(weighted_means_pearson_dabtramtodabtram_sim), sd(weighted_means_pearson_dabtramtodabtram_sim), lower.tail = F) # calculate p value from z score
# Compare each individual distribution of pearson correlations to the observed correlation by wilcoxon rank sum test and track pval
wilcox_pval_dabtramtodabtram <- c()
for (i in 1:length(dabtramtodabtram_lin_pearson_rand_list)){
sim_means <- unlist(lapply(dabtramtodabtram_lin_pearson_rand_list[[i]], mean))
wilcox_pval_dabtramtodabtram <- cbind(wilcox_pval_dabtramtodabtram, wilcox.test(x = unlist(lapply(dabtramtodabtram_lin_pearson_list, mean)),
y = sim_means, alternative = 'greater')$p.value)
}
# Save outputs
save(dabtramtodabtram, dabtramtodabtram_lin_pearson_list, dabtramtodabtram_lin_pearson_rand_list, z_mean_pearson_dabtramtodabtram, pval_mean_pearson_dabtramtodabtram, z_wmean_pearson_dabtramtodabtram, pval_wmean_pearson_dabtramtodabtram, wilcox_pval_dabtramtodabtram, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/dabtramtodabtram_pearson_sim_results.RData')
rm(dabtramtodabtram, dabtramtodabtram_lin_pearson_list, dabtramtodabtram_lin_pearson_rand_list, dabtramtodabtram_input_data)
Idents(all_data) <- all_data$OG_condition # Change the idents to the OG condition for subsetting to dabtramtocis
dabtramtocis <- subset(all_data, idents = 'dabtramtocis') # Subset down to the dabtramtocis object
dabtramtocis <- NormalizeData(dabtramtocis)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
dabtramtocis <- FindVariableFeatures(dabtramtocis, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
dabtramtocis <- ScaleData(dabtramtocis)
Centering and scaling data matrix
|
| | 0%
|
|============================================== | 50%
|
|===========================================================================================| 100%
dabtramtocis <- RunPCA(dabtramtocis)
PC_ 1
Positive: HNRNPAB, MKI67, TUBA1B, TPM3, PRKDC, HMGB1, MT-ATP6, CFL1, ACTB, CENPF
YWHAB, CYCS, MT-CO3, H2AFZ, TPX2, SFPQ, CALU, PARP1, UBE2S, FASN
PFN1, TMPO, ASPM, CALR, MCM4, YWHAZ, ATAD2, RANBP1, STMN1, MYH10
Negative: GAS5, SAT1, MALAT1, SNHG7, EPB41L4A-AS1, TAF1D, GAPDH, HSPA1A, WSB1, NDRG1
BNIP3, SNHG12, HSPA1B, NRN1, DDIT4, SLC2A3, NT5C3A, CLK1, MYC, RGS1
RGS2, CREBRF, ZNF581, BBC3, POU3F1, ANKRD37, SH3BP4, RIPK4, APOD, HIST3H2A
PC_ 2
Positive: MT-CO3, MT-CO2, MT-ATP6, MT-CYB, MT-ND3, MT-ND2, MT-ND1, MT-ND4, MARCKSL1, ARPC1B
CD63, LY6E, S100B, CCND1, FTH1, NOLC1, NDUFAF8, NDUFC2, HIPK2, PRKDC
FABP5, CFL1, NDUFB2, FASN, S100A11, METRN, SRM, PARP1, WWTR1, ATOX1
Negative: MKI67, CENPF, TOP2A, ASPM, TPX2, UBE2C, NUSAP1, HMGB2, PRC1, GTSE1
CENPE, PLK1, CCNB1, HMMR, KIF14, NUF2, KIF2C, AURKA, ARL6IP1, CENPA
DLGAP5, DEPDC1, SGO2, NEK2, ANLN, CEP55, CCNA2, CDCA8, BIRC5, PRR11
PC_ 3
Positive: GAPDH, ACTG1, BNIP3, RPS8, FTL, SCD, EMP1, PGK1, DDIT4, RPL12
TPI1, MYC, GAS5, CSTB, SH3BP4, RPS6, TIMP3, NT5C3A, BHLHE40, SNHG7
PKM, UPP1, LGALS1, FTH1, NRN1, CTSD, TMSB10, NDRG1, LY6E, GYPC
Negative: MT-ATP6, MT-CO3, HELLS, ATAD2, MT-ND3, CLSPN, XRCC2, C21orf58, HIST1H1B, HIST1H1D
BRCA1, FAM111A, MT-CYB, MT-CO2, BRCA2, HMGA2-AS1, MALAT1, RRM2, FANCA, NEAT1
POLQ, PCNA, HIST2H2AC, OR2AT4, MCM4, POLD3, ORC6, AC058791.1, ATAD5, AUXG01000058.1
PC_ 4
Positive: MT-ATP6, MT-CO3, MT-CYB, MT-ND3, MT-CO2, CCNB1, CENPE, CDC20, PLK1, PTTG1
JPT1, UBE2S, CDKN3, CCNB2, CD63, DYNLL1, HMMR, BIRC5, PTMS, AURKA
KIF14, ARL6IP1, CENPA, PRDX1, DLGAP5, PRR11, TPX2, MT-ND2, NEK2, ASPM
Negative: MCM4, HIST1H1B, ATAD2, CLSPN, RRM2, HELLS, MCM3, PCNA, HIST1H1D, PCLAF
DTL, GAPDH, BRCA1, POLD3, HIST1H4C, MALAT1, CENPU, HIST2H2AC, BHLHE40, BNIP3
FAM111A, E2F1, DDIT4, MCM7, CCNE2, MCM5, NDRG1, ENO2, ADM, CHAF1A
PC_ 5
Positive: PRDX1, PSMA7, S100A6, TMSB10, ATOX1, FTL, MT2A, HSP90AA1, H2AFZ, CD63
FTH1, CHCHD2, CSTB, LGALS1, DBI, GSTO1, PHLDA2, SH3BGRL3, HSPE1, S100A11
ATP5MC3, CALM1, FABP5, POMP, HMGB1, NUDT1, ACTB, TMSB4X, VGF, NME1
Negative: NEAT1, MALAT1, GABPB1-AS1, BTG1, MT-ND2, MT-ND1, PDE3B, HPS4, SPTBN1, MT-ND6
MT-ND3, PIK3R3, MACF1, SAT1, NHLRC3, MT-CO2, MT-ND4, SNHG14, ZFP36L1, HIPK2
VMP1, CPM, ZNF106, FN1, MT-CYB, MT-CO3, AKAP12, SOX4, MYO10, MIR3142HG
ElbowPlot(dabtramtocis) # The standard deviation seems to really level off at 10
# Recluster with the appropriate number of dimensions
dabtramtocis <- FindNeighbors(dabtramtocis, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
dabtramtocis <- FindClusters(dabtramtocis, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 7070
Number of edges: 253207
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8557
Number of communities: 9
Elapsed time: 0 seconds
dabtramtocis <- RunUMAP(dabtramtocis, dims = 1:15)
10:46:18 UMAP embedding parameters a = 0.9922 b = 1.112
10:46:18 Read 7070 rows and found 15 numeric columns
10:46:18 Using Annoy for neighbor search, n_neighbors = 30
10:46:18 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:46:19 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//Rtmpx16qOy/filec5f813c58810
10:46:19 Searching Annoy index using 1 thread, search_k = 3000
10:46:21 Annoy recall = 100%
10:46:21 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
10:46:22 Initializing from normalized Laplacian + noise (using irlba)
10:46:22 Commencing optimization for 500 epochs, with 300562 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:46:31 Optimization finished
DimPlot(dabtramtocis, reduction = 'umap', pt.size = 1)
# Get the scaled data from the dabtramtocis object
dabtramtocis_input_data <- GetAssayData(dabtramtocis, assay = 'RNA', slot = 'scale.data')
# Build list of lineages with at least 5 cells in dab tram and do pearson correlation
dabtramtocis_lin_pearson_list <- list()
for (i in fivecell_cDNA$DabTramtoCis){
temp_pearson <- cor(dabtramtocis_input_data[,colnames(dabtramtocis)[dabtramtocis$Lineage == i]])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
dabtramtocis_lin_pearson_list[[i]] <- temp_pearson_filt
}
# Need to do a random sampling of the same thing
dabtramtocis_lin_pearson_rand_list <- list()
num_iter <- 100
for(j in 1:num_iter){
dabtramtocis_lin_pearson_rand_list[[j]] <- list()
for (i in fivecell_cDNA$DabTramtoCis){
set.seed(j)
num_cells <- length(dabtramtocis$Lineage[dabtramtocis$Lineage == i])
temp_pearson <- cor(dabtramtocis_input_data[,sample(colnames(dabtramtocis), num_cells, replace = F)])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
dabtramtocis_lin_pearson_rand_list[[j]][[i]] <- temp_pearson_filt
}
}
# Find the mean of the average pearson correlation per lineage
mean_pearson_dabtramtocis <- mean(unlist(lapply(dabtramtocis_lin_pearson_list, mean))) # True mean of average correlations per lineage
means_pearson_dabtramtocis_sim <- sapply(1:length(dabtramtocis_lin_pearson_rand_list), function (y)
mean(unlist(lapply(dabtramtocis_lin_pearson_rand_list[[y]], mean)))) # list of mean of average correlations per lineage
z_mean_pearson_dabtramtocis <- (mean_pearson_dabtramtocis-mean(means_pearson_dabtramtocis_sim))/sd(means_pearson_dabtramtocis_sim) # Z score comparing mean to simulations
pval_mean_pearson_dabtramtocis <- pnorm(z_mean_pearson_dabtramtocis, mean(means_pearson_dabtramtocis_sim), sd(means_pearson_dabtramtocis_sim), lower.tail = F) # calculate p value from z score
# Find the weighted means of the average pearson correlations per lineage
weighted_mean_pearson_dabtramtocis <- weighted.mean(unlist(lapply(dabtramtocis_lin_pearson_list, mean)),
unlist(lapply(dabtramtocis_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_dabtramtocis_sim <- sapply(1:length(dabtramtocis_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(dabtramtocis_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(dabtramtocis_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
z_wmean_pearson_dabtramtocis <- (weighted_mean_pearson_dabtramtocis-mean(weighted_means_pearson_dabtramtocis_sim))/sd(weighted_means_pearson_dabtramtocis_sim) # Z score comparing mean to simulations
pval_wmean_pearson_dabtramtocis <- pnorm(z_wmean_pearson_dabtramtocis, mean(weighted_means_pearson_dabtramtocis_sim), sd(weighted_means_pearson_dabtramtocis_sim), lower.tail = F) # calculate p value from z score
# Compare each individual distribution of pearson correlations to the observed correlation by wilcoxon rank sum test and track pval
wilcox_pval_dabtramtocis <- c()
for (i in 1:length(dabtramtocis_lin_pearson_rand_list)){
sim_means <- unlist(lapply(dabtramtocis_lin_pearson_rand_list[[i]], mean))
wilcox_pval_dabtramtocis <- cbind(wilcox_pval_dabtramtocis, wilcox.test(x = unlist(lapply(dabtramtocis_lin_pearson_list, mean)),
y = sim_means, alternative = 'greater')$p.value)
}
# Save outputs
save(dabtramtocis, dabtramtocis_lin_pearson_list, dabtramtocis_lin_pearson_rand_list, z_mean_pearson_dabtramtocis, pval_mean_pearson_dabtramtocis, z_wmean_pearson_dabtramtocis, pval_wmean_pearson_dabtramtocis, wilcox_pval_dabtramtocis, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/dabtramtocis_pearson_sim_results.RData')
rm(dabtramtocis, dabtramtocis_lin_pearson_list, dabtramtocis_lin_pearson_rand_list, dabtramtocis_input_data)
Idents(all_data) <- all_data$OG_condition # Change the idents to the OG condition for subsetting to dabtram
dabtramtococl2 <- subset(all_data, idents = 'dabtramtococl2') # Subset down to the dabtram object
dabtramtococl2 <- NormalizeData(dabtramtococl2)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
dabtramtococl2 <- FindVariableFeatures(dabtramtococl2, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
dabtramtococl2 <- ScaleData(dabtramtococl2)
Centering and scaling data matrix
|
| | 0%
|
|============================================== | 50%
|
|===========================================================================================| 100%
dabtramtococl2 <- RunPCA(dabtramtococl2)
PC_ 1
Positive: HIST1H4C, H2AFZ, TUBB, UBE2S, DTYMK, TUBA1B, GTSE1, UBE2C, C12orf75, CKS1B
TAGLN2, BIRC5, HMGB1, STMN1, TK1, TPX2, NME2, RANBP1, NUDT1, RPS5
JPT1, GYPC, ACTG1, EEF1D, TUBA1C, PRELID1, FOSL1, PHLDA2, CCDC85B, PTTG1
Negative: NEAT1, AC058791.1, MMP1, HMGA2-AS1, TTN, IGFN1, A2M, SNHG14, CBLB, S100B
CDH19, MMP3, SLC7A11-AS1, SOX10, LINC01705, CXCL8, CADPS, GPM6B, SULT1C2, PTGS2
MIR3142HG, MLANA, DCT, FBXO32, PRSS35, LINC00513, HSPA6, SLC5A3, AL139383.1, HSPA1B
PC_ 2
Positive: MKI67, CENPF, ASPM, TOP2A, TPX2, TIMP3, PRC1, ANLN, BIRC5, RRM2
HJURP, IGFBP5, MT-ND3, VCL, STMN1, NUSAP1, AURKA, COL1A1, GTSE1, CCND1
COL6A1, KIF11, HIST1H1D, CENPE, CEP55, UBE2C, CCNB1, DEPDC1, CAV1, HMGB2
Negative: LINC00520, DDIT3, SAT1, GAS5, CSTB, AL118516.1, SNHG7, SNHG12, PDRG1, SNHG15
SQSTM1, ATP6V1F, MAP1LC3B, HMOX1, ATP6V0B, MT1X, ADRM1, ZFAND2A, AC003092.1, EIF5
ATF4, GADD45B, KLHL21, ODC1, SERTAD1, MED10, MED31, FBXO32, SNHG6, HSPA1A
PC_ 3
Positive: CENPF, MKI67, TPX2, ASPM, TOP2A, ARL6IP1, NUSAP1, UBE2C, CCNB1, BIRC5
PRC1, PTTG1, AURKA, GTSE1, RRM2, UBE2S, HSP90AA1, CEP55, JPT1, HSPA1A
ANLN, KPNA2, KIF2C, CKS2, CENPE, CENPA, HMMR, GADD45B, AURKB, CDKN3
Negative: TIMP3, IGFBP7, CAV1, SFRP1, CALD1, GNG11, CTHRC1, TMEM158, IGFBP2, COL6A1
IGFBP5, COL6A2, SCG2, TMSB4X, NQO1, BIRC7, S100A13, SERPINE2, CADM1, CCPG1
CHCHD10, CST3, RPL34, CCN4, AKR1B1, UBE2E3, COL1A1, KYNU, MPC2, LOXL2
PC_ 4
Positive: SERPINB2, TFPI2, RND3, TMSB4X, MMP1, IL24, SCG5, IER3, DKK1, EREG
AC003092.1, MMP3, LINC02376, CITED2, BASP1, IER2, ARL4C, KBTBD8, TNFRSF12A, ERRFI1
JUN, GDF15, LINC00973, DDIT3, CXCL8, IGFN1, IL1B, PPP1R15A, GADD45A, STC1
Negative: MLANA, S100B, SOX10, MIA, PMEL, TYR, GAS7, GPM6B, FXYD3, RAMP1
MFSD12, PLP1, NFATC2, DCT, SLC1A4, LHFPL3-AS1, SLAMF9, CDH19, CITED1, SORBS2
RAB38, ERBB3, TNS1, AL035541.1, SOX6, ZNF704, AKAP12, UCN2, COL9A3, A2M
PC_ 5
Positive: ARL4C, PRNP, FRMD4A, MME, CADM1, SRGN, IGFN1, TMSB4X, ARHGAP18, SIRPB1
SFRP1, SERPINB2, SPOCK1, BIRC7, GPNMB, MBP, MAP4K4, TNIK, LINC02376, SPP1
TMEM158, HAS2, TGFBR1, DNER, HCFC1R1, NR2F2, BACE2, CDC42EP3, PMP22, CAPG
Negative: PLAT, TRPA1, CTSC, IGFBP5, SLC14A1, TNFRSF11B, MMD, BOC, CBLN2, TMEM100
COLEC10, KYNU, TBX2, F3, C11orf96, FAM20C, FOXF1, BMP2, CDCP1, FENDRR
TMX4, HSD17B2, PCDH18, ACSL4, BAMBI, ISG15, PHACTR2, AKR1B1, SIX1, AL353653.1
ElbowPlot(dabtramtococl2) # The standard deviation seems to really level off at 10
# Recluster with the appropriate number of dimensions
dabtramtococl2 <- FindNeighbors(dabtramtococl2, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
dabtramtococl2 <- FindClusters(dabtramtococl2, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 10184
Number of edges: 315953
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8518
Number of communities: 11
Elapsed time: 1 seconds
dabtramtococl2 <- RunUMAP(dabtramtococl2, dims = 1:15)
10:49:12 UMAP embedding parameters a = 0.9922 b = 1.112
10:49:12 Read 10184 rows and found 15 numeric columns
10:49:12 Using Annoy for neighbor search, n_neighbors = 30
10:49:12 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:49:13 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//Rtmpx16qOy/filec5f8d67dc94
10:49:13 Searching Annoy index using 1 thread, search_k = 3000
10:49:16 Annoy recall = 100%
10:49:16 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
10:49:17 Initializing from normalized Laplacian + noise (using irlba)
10:49:17 Commencing optimization for 200 epochs, with 431274 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:49:22 Optimization finished
DimPlot(dabtramtococl2, reduction = 'umap', pt.size = 1)
# Get the scaled data from the dabtramtococl2 object
dabtramtococl2_input_data <- GetAssayData(dabtramtococl2, assay = 'RNA', slot = 'scale.data')
# Build list of lineages with at least 5 cells in dab tram and do pearson correlation
dabtramtococl2_lin_pearson_list <- list()
for (i in fivecell_cDNA$DabTramtoCoCl2){
temp_pearson <- cor(dabtramtococl2_input_data[,colnames(dabtramtococl2)[dabtramtococl2$Lineage == i]])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
dabtramtococl2_lin_pearson_list[[i]] <- temp_pearson_filt
}
# Need to do a random sampling of the same thing
dabtramtococl2_lin_pearson_rand_list <- list()
num_iter <- 100
for(j in 1:num_iter){
dabtramtococl2_lin_pearson_rand_list[[j]] <- list()
for (i in fivecell_cDNA$DabTramtoCoCl2){
set.seed(j)
num_cells <- length(dabtramtococl2$Lineage[dabtramtococl2$Lineage == i])
temp_pearson <- cor(dabtramtococl2_input_data[,sample(colnames(dabtramtococl2), num_cells, replace = F)])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
dabtramtococl2_lin_pearson_rand_list[[j]][[i]] <- temp_pearson_filt
}
}
# Find the mean of the average pearson correlation per lineage
mean_pearson_dabtramtococl2 <- mean(unlist(lapply(dabtramtococl2_lin_pearson_list, mean))) # True mean of average correlations per lineage
means_pearson_dabtramtococl2_sim <- sapply(1:length(dabtramtococl2_lin_pearson_rand_list), function (y)
mean(unlist(lapply(dabtramtococl2_lin_pearson_rand_list[[y]], mean)))) # list of mean of average correlations per lineage
z_mean_pearson_dabtramtococl2 <- (mean_pearson_dabtramtococl2-mean(means_pearson_dabtramtococl2_sim))/sd(means_pearson_dabtramtococl2_sim) # Z score comparing mean to simulations
pval_mean_pearson_dabtramtococl2 <- pnorm(z_mean_pearson_dabtramtococl2, mean(means_pearson_dabtramtococl2_sim), sd(means_pearson_dabtramtococl2_sim), lower.tail = F) # calculate p value from z score
# Find the weighted means of the average pearson correlations per lineage
weighted_mean_pearson_dabtramtococl2 <- weighted.mean(unlist(lapply(dabtramtococl2_lin_pearson_list, mean)),
unlist(lapply(dabtramtococl2_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_dabtramtococl2_sim <- sapply(1:length(dabtramtococl2_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(dabtramtococl2_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(dabtramtococl2_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
z_wmean_pearson_dabtramtococl2 <- (weighted_mean_pearson_dabtramtococl2-mean(weighted_means_pearson_dabtramtococl2_sim))/sd(weighted_means_pearson_dabtramtococl2_sim) # Z score comparing mean to simulations
pval_wmean_pearson_dabtramtococl2 <- pnorm(z_wmean_pearson_dabtramtococl2, mean(weighted_means_pearson_dabtramtococl2_sim), sd(weighted_means_pearson_dabtramtococl2_sim), lower.tail = F) # calculate p value from z score
# Compare each individual distribution of pearson correlations to the observed correlation by wilcoxon rank sum test and track pval
wilcox_pval_dabtramtococl2 <- c()
for (i in 1:length(dabtramtococl2_lin_pearson_rand_list)){
sim_means <- unlist(lapply(dabtramtococl2_lin_pearson_rand_list[[i]], mean))
wilcox_pval_dabtramtococl2 <- cbind(wilcox_pval_dabtramtococl2, wilcox.test(x = unlist(lapply(dabtramtococl2_lin_pearson_list, mean)),
y = sim_means, alternative = 'greater')$p.value)
}
# Save outputs
save(dabtramtococl2, dabtramtococl2_lin_pearson_list, dabtramtococl2_lin_pearson_rand_list, z_mean_pearson_dabtramtococl2, pval_mean_pearson_dabtramtococl2, z_wmean_pearson_dabtramtococl2, pval_wmean_pearson_dabtramtococl2, wilcox_pval_dabtramtococl2, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/dabtramtococl2_pearson_sim_results.RData')
rm(dabtramtococl2, dabtramtococl2_lin_pearson_list, dabtramtococl2_lin_pearson_rand_list, dabtramtococl2_input_data)
Idents(all_data) <- all_data$OG_condition # Change the idents to the OG condition for subsetting to cocl2
cocl2 <- subset(all_data, idents = 'cocl2') # Subset down to the cocl2 object
cocl2 <- NormalizeData(cocl2)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cocl2 <- FindVariableFeatures(cocl2, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cocl2 <- ScaleData(cocl2)
Centering and scaling data matrix
|
| | 0%
|
|============================================== | 50%
|
|===========================================================================================| 100%
cocl2 <- RunPCA(cocl2)
PC_ 1
Positive: MLANA, PMEL, H2AFZ, CAPG, MT-CYB, MITF, STMN1, MKI67, HMGB1, LGALS3
CHCHD6, TPX2, CENPF, FXYD3, MT-ND2, CDK2, RGS10, TOP2A, FRMD4B, ATOX1
ASPM, PIK3R3, ACTG1, NUSAP1, QPCT, TIMM50, TSPAN10, BIRC5, RPL12, AURKB
Negative: MT2A, UBC, MT1X, TNFRSF12A, JUN, FN1, CLEC2B, GNG11, TMEM158, PPP1R15A
FTL, RGS2, ANXA1, AC003092.1, TFPI2, ANGPTL4, DKK1, LINC00973, BASP1, PHLDA1
PRNP, CITED2, DDIT3, LUCAT1, EIF1, GAS5, MALAT1, IER2, VGF, FTH1
PC_ 2
Positive: SAT1, PMEL, ERBB3, PDE3B, RAMP1, APOE, RPL23, GAS5, TYR, TDRD3
NFATC2, MIA, RGS10, LHFPL3-AS1, EPB41L4A-AS1, RAB38, MLANA, C11orf96, ZFAS1, TFAP2A
SLC44A1, MYC, CBLB, BCAN, NEAT1, TRMT9B, PIK3R3, EDNRB, STK32A, CADPS
Negative: UBE2S, TUBA1B, TUBB, MKI67, CENPF, UBE2C, CKS1B, HMGB2, TPX2, TOP2A
BIRC5, NUSAP1, GTSE1, ACTB, RRM2, TK1, DTYMK, PRC1, ANLN, FOSL1
CALM2, UBE2T, SMC4, TUBB4B, TMSB4X, HIST1H4C, PCLAF, CDK1, C12orf75, CKS2
PC_ 3
Positive: HSPA1A, GADD45B, SNHG12, DDIT3, SNHG7, HSPA1B, ZFAS1, SNHG1, GAS5, KBTBD8
CKS2, SERTAD1, PDRG1, SNHG15, ZFAND2A, ATF3, MAPKAPK5-AS1, HSPH1, PCLO, DNAJB1
ATF4, PPP1R15A, HIST1H4H, MIR4300HG, HSP90AA1, IER2, PIGL, IGFL2-AS1, IFRD1, LINC00520
Negative: SERPINE2, S100A6, TIMP3, CST3, TIMP1, GNAS, S100A13, LY6E, SH3BGRL3, FN1
CCND1, TMEM158, CAV1, PRSS35, MIA, VKORC1, S100A1, CADM1, SLC20A1, PRNP
PRSS23, ME1, CTHRC1, FGFBP2, DCBLD2, CDKN2A, P4HB, CALD1, PRRX1, SPARC
PC_ 4
Positive: FN1, CD44, THBS1, MALAT1, SRGN, DST, MAP4K4, AHNAK, MT-ND6, IGFBP7
F2R, IGFN1, LMO7, NEAT1, COL6A2, ASPM, MMP2, SERPINB2, EREG, SCG2
ARL4C, IL6ST, PAG1, GABPB1-AS1, HIST1H1D, CBLB, TXNRD1, MYOF, TFPI2, F3
Negative: NDUFC2, PSMA7, CSTB, ATOX1, SEC61G, NDUFA4, UPP1, FAM162A, PRDX1, LGALS3
CALM1, BIRC7, ATP6V0B, MAP1LC3B, TXNDC17, BNIP3, C4orf3, RAB5IF, ATP5MC3, VKORC1
SEC11C, QPCT, UCN2, DBI, GSTP1, TMSB10, PGK1, MIA, S100A11, GNAS
PC_ 5
Positive: RPL12, LGALS1, FTL, SRGN, FTH1, TFPI2, SERPINB2, TMSB4X, TMSB10, TXN
RPL23, SCG5, C12orf75, CHCHD10, LINC02376, MYEOV, BASP1, ATP5MC3, MMP1, DBI
GYPC, ACTG1, S100A11, F3, CSF1, IGFBP7, RGS10, BOLA3, VEGFC, KYNU
Negative: MALAT1, NEAT1, SLC5A3, VMP1, DST, FAT1, SERPINE2, ZFYVE16, NFKBIZ, TXNIP
GABPB1-AS1, SGK1, CADM1, SLC2A3, USP53, APOD, COL9A3, SLC20A1, HSP90B1, CDH19
MFGE8, PRSS35, GAS7, SNHG14, CBLB, LEF1, GLS, NORAD, FGFBP2, SPARC
ElbowPlot(cocl2) # The standard deviation seems to really level off at 10
# Recluster with the appropriate number of dimensions
cocl2 <- FindNeighbors(cocl2, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
cocl2 <- FindClusters(cocl2, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 4823
Number of edges: 164423
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8480
Number of communities: 11
Elapsed time: 0 seconds
cocl2 <- RunUMAP(cocl2, dims = 1:15)
10:52:30 UMAP embedding parameters a = 0.9922 b = 1.112
10:52:30 Read 4823 rows and found 15 numeric columns
10:52:30 Using Annoy for neighbor search, n_neighbors = 30
10:52:30 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:52:31 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//Rtmpx16qOy/filec5f81dc27f6c
10:52:31 Searching Annoy index using 1 thread, search_k = 3000
10:52:32 Annoy recall = 100%
10:52:32 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
10:52:33 Initializing from normalized Laplacian + noise (using irlba)
10:52:33 Commencing optimization for 500 epochs, with 201272 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:52:40 Optimization finished
DimPlot(cocl2, reduction = 'umap', pt.size = 1)
# Get the scaled data from the cocl2 object
cocl2_input_data <- GetAssayData(cocl2, assay = 'RNA', slot = 'scale.data')
# Build list of lineages with at least 5 cells in dab tram and do pearson correlation
cocl2_lin_pearson_list <- list()
for (i in fivecell_cDNA$CoCl2){
temp_pearson <- cor(cocl2_input_data[,colnames(cocl2)[cocl2$Lineage == i]])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cocl2_lin_pearson_list[[i]] <- temp_pearson_filt
}
# Need to do a random sampling of the same thing
cocl2_lin_pearson_rand_list <- list()
num_iter <- 100
for(j in 1:num_iter){
cocl2_lin_pearson_rand_list[[j]] <- list()
for (i in fivecell_cDNA$CoCl2){
set.seed(j)
num_cells <- length(cocl2$Lineage[cocl2$Lineage == i])
temp_pearson <- cor(cocl2_input_data[,sample(colnames(cocl2), num_cells, replace = F)])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cocl2_lin_pearson_rand_list[[j]][[i]] <- temp_pearson_filt
}
}
# Find the mean of the average pearson correlation per lineage
mean_pearson_cocl2 <- mean(unlist(lapply(cocl2_lin_pearson_list, mean))) # True mean of average correlations per lineage
means_pearson_cocl2_sim <- sapply(1:length(cocl2_lin_pearson_rand_list), function (y)
mean(unlist(lapply(cocl2_lin_pearson_rand_list[[y]], mean)))) # list of mean of average correlations per lineage
z_mean_pearson_cocl2 <- (mean_pearson_cocl2-mean(means_pearson_cocl2_sim))/sd(means_pearson_cocl2_sim) # Z score comparing mean to simulations
pval_mean_pearson_cocl2 <- pnorm(z_mean_pearson_cocl2, mean(means_pearson_cocl2_sim), sd(means_pearson_cocl2_sim), lower.tail = F) # calculate p value from z score
# Find the weighted means of the average pearson correlations per lineage
weighted_mean_pearson_cocl2 <- weighted.mean(unlist(lapply(cocl2_lin_pearson_list, mean)),
unlist(lapply(cocl2_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cocl2_sim <- sapply(1:length(cocl2_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cocl2_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cocl2_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
z_wmean_pearson_cocl2 <- (weighted_mean_pearson_cocl2-mean(weighted_means_pearson_cocl2_sim))/sd(weighted_means_pearson_cocl2_sim) # Z score comparing mean to simulations
pval_wmean_pearson_cocl2 <- pnorm(z_wmean_pearson_cocl2, mean(weighted_means_pearson_cocl2_sim), sd(weighted_means_pearson_cocl2_sim), lower.tail = F) # calculate p value from z score
# Compare each individual distribution of pearson correlations to the observed correlation by wilcoxon rank sum test and track pval
wilcox_pval_cocl2 <- c()
for (i in 1:length(cocl2_lin_pearson_rand_list)){
sim_means <- unlist(lapply(cocl2_lin_pearson_rand_list[[i]], mean))
wilcox_pval_cocl2 <- cbind(wilcox_pval_cocl2, wilcox.test(x = unlist(lapply(cocl2_lin_pearson_list, mean)),
y = sim_means, alternative = 'greater')$p.value)
}
# Save outputs
save(cocl2, cocl2_lin_pearson_list, cocl2_lin_pearson_rand_list, z_mean_pearson_cocl2, pval_mean_pearson_cocl2, z_wmean_pearson_cocl2, pval_wmean_pearson_cocl2, wilcox_pval_cocl2, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cocl2_pearson_sim_results.RData')
rm(cocl2, cocl2_lin_pearson_list, cocl2_lin_pearson_rand_list, cocl2_input_data)
Idents(all_data) <- all_data$OG_condition # Change the idents to the OG condition for subsetting to cocl2tococl2
cocl2tococl2 <- subset(all_data, idents = 'cocl2tococl2') # Subset down to the cocl2tococl2 object
cocl2tococl2 <- NormalizeData(cocl2tococl2)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cocl2tococl2 <- FindVariableFeatures(cocl2tococl2, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cocl2tococl2 <- ScaleData(cocl2tococl2)
Centering and scaling data matrix
|
| | 0%
|
|============================================== | 50%
|
|===========================================================================================| 100%
cocl2tococl2 <- RunPCA(cocl2tococl2)
PC_ 1
Positive: RPS8, RPL10, RPS2, RPS3, RPS14, GAPDH, RPL28, RPL41, FTH1, RPS5
RPS12, RPS16, RPL26, EEF1A1, RPL11, RPS15A, RPL12, FTL, RPS4X, RPL29
RPS11, RPL7A, RPL15, RPL8, RPL17, RPS6, RPS3A, RPS23, RPS13, RPL19
Negative: MALAT1, NEAT1, WSB1, PLAC4, MT-ND6, NFKBIZ, LINC00488, SNHG14, PAXBP1, MT-ATP6
HPS4, HMGA2-AS1, SLC5A3, HSPA1B, NKTR, HSPA1A, ANKRD11, PRR26, AL162426.1, POLR2J3
CLCN7, CCDC144A, VMP1, FTX, AC005632.3, AC003681.1, ATP1A1-AS1, AC058791.1, PLCG2, N4BP2L2
PC_ 2
Positive: NEAT1, MKI67, CENPF, ASPM, MALAT1, ANKRD11, GTSE1, CIT, POLR2J3, FRMD4B
TPX2, SLC7A11, PRKDC, PRC1, TOP2A, SFPQ, NAV2, NUSAP1, ZFYVE16, UBE2C
MITF, HPS4, WSB1, HSP90B1, RAD21, MAT2A, SLC20A1, SLC25A37, CALR, AHNAK
Negative: RPL26, RPS12, RPL41, RPS23, FTH1, RPL10, RPL11, RPS16, RPS14, RPS2
FTL, RPS11, RPS3A, RPL34, RPS3, RPL28, RPL29, RPS4X, RPL17, RPL7A
RPS15A, EEF1A1, RPL35A, RPL8, RPL15, RPL23, RPS13, RPS5, GAPDH, RPS6
PC_ 3
Positive: NEAT1, GABPB1-AS1, SAT1, SNHG14, RPS8, ZKSCAN1, PRAME, LHFPL3-AS1, POLR2J3, ERBB3
TRIM25, PLEC, FAT1, CADPS, BTG1, CCNI, CBLB, N4BP2L2, TXNIP, TFAP2A
SLC25A37, DANCR, SLC5A3, ZNF106, SLC2A3, SCD, MT-ND3, SEZ6L2, CSPG4, PDE3B
Negative: CENPF, MKI67, TPX2, UBE2C, TOP2A, ASPM, HMGB2, AURKA, NUSAP1, CCNB1
PRC1, UBE2S, GTSE1, BIRC5, KIF2C, KPNA2, PLK1, PTTG1, CEP55, HMMR
CKAP2, CKS2, ARL6IP1, TUBB4B, CCNA2, CENPE, CDC20, PRR11, TUBA1B, DLGAP5
PC_ 4
Positive: MT-ATP6, MT-ND3, GABPB1-AS1, LHFPL3-AS1, MT-ND6, PIK3R3, LINC00488, MIR3142HG, SLC16A1-AS1, ZKSCAN1
N4BP2L2, HPS4, ASPM, FRMD4B, FTX, NFIA, HIPK2, SEMA6A, ERBB3, TSPAN10
PDE3B, AC063923.2, LRRC75A, NPAT, TRIM73, PRKDC, RPS3, AC008170.1, HELLS, OR2AT4
Negative: MT2A, IER3, HSPA1A, DNAJB1, NDRG1, ADM, HSPA1B, S100A6, TMEM158, TIMP1
HSP90AA1, MT1X, RGS2, SLC5A3, DDIT4, SERPINE2, FN1, HILPDA, SLC20A1, SH3BGRL3
GAPDH, TIMP3, VGF, FTL, TMSB10, PPP1R15A, PRSS35, RND3, PLIN2, SPP1
PC_ 5
Positive: HSPA1A, DNAJB1, HSPA1B, SAT1, HSP90AA1, RGS1, NFKBIZ, HPS4, PPP1R15A, KLF6
SLC2A3, CHORDC1, GAS5, SNHG7, IRS2, BBC3, ID3, C11orf96, FOS, ID2
RPL10, NKTR, MALAT1, RGS10, TECR, YPEL2, ZFP36L1, BTG1, EPB41L4A-AS1, NR4A1
Negative: MT-ATP6, MT-ND3, S100A6, MT-ND6, FN1, SH3BGRL3, DCBLD2, TMSB10, TMSB4X, HNRNPAB
FTL, VGF, TMEM158, FTH1, STC1, FABP5, TXNRD1, PRKDC, PHLDA2, GABPB1-AS1
TPM3, CFL1, TNFRSF12A, PFN1, PKM, ATOX1, PRDX1, CALM1, ATAD2, YWHAB
ElbowPlot(cocl2tococl2) # The standard deviation seems to really level off at 10
# Recluster with the appropriate number of dimensions
cocl2tococl2 <- FindNeighbors(cocl2tococl2, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
cocl2tococl2 <- FindClusters(cocl2tococl2, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 13951
Number of edges: 400713
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8156
Number of communities: 8
Elapsed time: 2 seconds
cocl2tococl2 <- RunUMAP(cocl2tococl2, dims = 1:15)
10:55:09 UMAP embedding parameters a = 0.9922 b = 1.112
10:55:09 Read 13951 rows and found 15 numeric columns
10:55:09 Using Annoy for neighbor search, n_neighbors = 30
10:55:09 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:55:10 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//Rtmpx16qOy/filec5f84c3b9ab7
10:55:10 Searching Annoy index using 1 thread, search_k = 3000
10:55:14 Annoy recall = 100%
10:55:15 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
10:55:15 Initializing from normalized Laplacian + noise (using irlba)
10:55:15 Commencing optimization for 200 epochs, with 602270 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:55:23 Optimization finished
DimPlot(cocl2tococl2, reduction = 'umap', pt.size = 1)
# Get the scaled data from the cocl2tococl2 object
cocl2tococl2_input_data <- GetAssayData(cocl2tococl2, assay = 'RNA', slot = 'scale.data')
# Build list of lineages with at least 5 cells in dab tram and do pearson correlation
cocl2tococl2_lin_pearson_list <- list()
for (i in fivecell_cDNA$CoCl2toCoCl2){
temp_pearson <- cor(cocl2tococl2_input_data[,colnames(cocl2tococl2)[cocl2tococl2$Lineage == i]])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cocl2tococl2_lin_pearson_list[[i]] <- temp_pearson_filt
}
# Need to do a random sampling of the same thing
cocl2tococl2_lin_pearson_rand_list <- list()
num_iter <- 100
for(j in 1:num_iter){
cocl2tococl2_lin_pearson_rand_list[[j]] <- list()
for (i in fivecell_cDNA$CoCl2toCoCl2){
set.seed(j)
num_cells <- length(cocl2tococl2$Lineage[cocl2tococl2$Lineage == i])
temp_pearson <- cor(cocl2tococl2_input_data[,sample(colnames(cocl2tococl2), num_cells, replace = F)])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cocl2tococl2_lin_pearson_rand_list[[j]][[i]] <- temp_pearson_filt
}
}
# Find the mean of the average pearson correlation per lineage
mean_pearson_cocl2tococl2 <- mean(unlist(lapply(cocl2tococl2_lin_pearson_list, mean))) # True mean of average correlations per lineage
means_pearson_cocl2tococl2_sim <- sapply(1:length(cocl2tococl2_lin_pearson_rand_list), function (y)
mean(unlist(lapply(cocl2tococl2_lin_pearson_rand_list[[y]], mean)))) # list of mean of average correlations per lineage
z_mean_pearson_cocl2tococl2 <- (mean_pearson_cocl2tococl2-mean(means_pearson_cocl2tococl2_sim))/sd(means_pearson_cocl2tococl2_sim) # Z score comparing mean to simulations
pval_mean_pearson_cocl2tococl2 <- pnorm(z_mean_pearson_cocl2tococl2, mean(means_pearson_cocl2tococl2_sim), sd(means_pearson_cocl2tococl2_sim), lower.tail = F) # calculate p value from z score
# Find the weighted means of the average pearson correlations per lineage
weighted_mean_pearson_cocl2tococl2 <- weighted.mean(unlist(lapply(cocl2tococl2_lin_pearson_list, mean)),
unlist(lapply(cocl2tococl2_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cocl2tococl2_sim <- sapply(1:length(cocl2tococl2_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cocl2tococl2_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cocl2tococl2_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
z_wmean_pearson_cocl2tococl2 <- (weighted_mean_pearson_cocl2tococl2-mean(weighted_means_pearson_cocl2tococl2_sim))/sd(weighted_means_pearson_cocl2tococl2_sim) # Z score comparing mean to simulations
pval_wmean_pearson_cocl2tococl2 <- pnorm(z_wmean_pearson_cocl2tococl2, mean(weighted_means_pearson_cocl2tococl2_sim), sd(weighted_means_pearson_cocl2tococl2_sim), lower.tail = F) # calculate p value from z score
# Compare each individual distribution of pearson correlations to the observed correlation by wilcoxon rank sum test and track pval
wilcox_pval_cocl2tococl2 <- c()
for (i in 1:length(cocl2tococl2_lin_pearson_rand_list)){
sim_means <- unlist(lapply(cocl2tococl2_lin_pearson_rand_list[[i]], mean))
wilcox_pval_cocl2tococl2 <- cbind(wilcox_pval_cocl2tococl2, wilcox.test(x = unlist(lapply(cocl2tococl2_lin_pearson_list, mean)),
y = sim_means, alternative = 'greater')$p.value)
}
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2tococl2_lin_pearson_list, :
cannot compute exact p-value with ties
# Save outputs
save(cocl2tococl2, cocl2tococl2_lin_pearson_list, cocl2tococl2_lin_pearson_rand_list, z_mean_pearson_cocl2tococl2, pval_mean_pearson_cocl2tococl2, z_wmean_pearson_cocl2tococl2, pval_wmean_pearson_cocl2tococl2, wilcox_pval_cocl2tococl2, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cocl2tococl2_pearson_sim_results.RData')
rm(cocl2tococl2, cocl2tococl2_lin_pearson_list, cocl2tococl2_lin_pearson_rand_list, cocl2tococl2_input_data)
Idents(all_data) <- all_data$OG_condition # Change the idents to the OG condition for subsetting to dabtram
cocl2tocis <- subset(all_data, idents = 'cocl2tocis') # Subset down to the cocl2 object
cocl2tocis <- NormalizeData(cocl2tocis)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cocl2tocis <- FindVariableFeatures(cocl2tocis, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cocl2tocis <- ScaleData(cocl2tocis)
Centering and scaling data matrix
|
| | 0%
|
|============================================== | 50%
|
|===========================================================================================| 100%
cocl2tocis <- RunPCA(cocl2tocis)
PC_ 1
Positive: MALAT1, SAT1, GAS5, RPL26, HSPA1A, SNHG7, PMEL, NEAT1, HSPA1B, NDRG1
RPS3, CREBRF, EPB41L4A-AS1, RPL7A, LINC01531, CLK1, SLC2A3, RGS2, APOD, NT5C3A
NUPR1, MYC, HIST3H2A, NHLRC3, PNRC1, SNHG14, RGS1, SLC5A3, DNAJB1, SNHG29
Negative: MKI67, ACTB, TUBA1B, MT-ATP6, HNRNPAB, PRKDC, CFL1, HMGB1, H2AFZ, TPX2
PARP1, CENPF, HIST1H4C, PFN1, CALR, DYNLL1, TMPO, YWHAB, HNRNPD, UBE2S
TPM3, DTYMK, NME1, YWHAZ, TUBB, CYCS, JPT1, PKM, AP2S1, ARPC1B
PC_ 2
Positive: FTL, LY6E, FTH1, MT-ND3, MT-CYB, MT-ATP6, MT-CO2, ARPC1B, RPS8, SERF2
PKM, RPL12, TMSB10, SRM, S100A1, MT-ND1, NME4, MT-ND2, S100B, TPI1
MARCKSL1, CYBA, CHCHD10, ATP5F1E, CFL1, METRN, SH3BGRL3, DBI, AP2S1, POLR2F
Negative: ASPM, CENPF, MKI67, TPX2, TOP2A, NUSAP1, UBE2C, CENPE, AURKA, GTSE1
CCNB1, HMGB2, PRC1, KIF14, DEPDC1, ARL6IP1, SGO2, HMMR, PLK1, KIF2C
CENPA, TUBB4B, ANLN, NEK2, KPNA2, CCNA2, KIF23, NUF2, CEP55, DLGAP5
PC_ 3
Positive: GAPDH, BNIP3, ACTG1, RPS8, CSTB, SNHG29, FTL, PGK1, BTG1, RPS3
MIF, DDIT4, LDHA, RPL12, GAS5, RPL26, SCD, MLANA, MYC, EMP1
TPI1, SNHG7, NT5C3A, SLAMF9, SH3BP4, S100A10, RGS10, ENO2, RPL29, FAM162A
Negative: ATAD2, MCM4, MT-ATP6, CLSPN, HELLS, PCNA, XRCC2, BRCA1, HIST1H1B, MT-ND6
DTL, MCM3, MCM6, HIST1H1D, MT-CO2, GINS2, CCNE2, BRCA2, RRM2, E2F1
POLD3, HIST2H2AC, MT-CYB, CHAF1A, MCM10, FANCA, CDC6, MCM7, FEN1, MCM5
PC_ 4
Positive: NEAT1, MALAT1, FN1, NDRG1, DDIT4, GAPDH, BNIP3, SLC5A3, SLC2A3, RGS2
SERPINE2, ADM, NT5C3A, BHLHE40, PPP1R15A, TNS1, A2M, ZFP36L1, ATAD2, CAV1
TIMP3, HIST1H1B, ENO2, NRN1, BIRC7, FGFBP2, CADM1, RND3, COL6A1, CCDC85B
Negative: PRDX1, CD63, ATOX1, AKR1A1, PSMA7, H2AFZ, MT-ATP6, DYNLL1, JPT1, PTTG1
UBE2S, FABP5, HMGB1, TXN, NDUFC2, DBI, CDKN3, POMP, CCNB1, CKS2
ARPC1B, HSP90AA1, CALM1, PMEL, PFN1, MLANA, EIF5A, CHCHD6, DCT, HMGB3
PC_ 5
Positive: FN1, AHNAK, IER3, MT-CO2, MT-ATP6, MT-ND1, SERPINE2, MT-CYB, MT-ND2, CCND1
SLC20A1, MT-ND3, MT-ND6, SORBS2, COL6A1, IGFBP7, CAV1, LGALS1, S100A4, NFATC2
ANXA2, PRRX1, SPP1, PRNP, ATP2B1, CALU, PLEC, ANXA1, PCOLCE, CDK6
Negative: RPL26, RPS3, HIST1H1B, HIST1H4C, PMEL, RPL7A, RPL29, HIST1H1D, MLANA, HIST2H2AC
SNHG29, RPS8, RGS10, RPL12, RRM2, PCLAF, HIST1H3D, HIST1H1E, ATAD2, AURKB
GAPDH, HIST1H1C, HIST1H2AH, CLSPN, HELLS, BRCA1, FAM111A, GAS5, HIST1H1A, CSTB
ElbowPlot(cocl2tocis) # The standard deviation seems to really level off at 10
# Recluster with the appropriate number of dimensions
cocl2tocis <- FindNeighbors(cocl2tocis, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
cocl2tocis <- FindClusters(cocl2tocis, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 6459
Number of edges: 229291
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8415
Number of communities: 9
Elapsed time: 0 seconds
cocl2tocis <- RunUMAP(cocl2tocis, dims = 1:15)
15:37:19 UMAP embedding parameters a = 0.9922 b = 1.112
15:37:19 Read 6459 rows and found 15 numeric columns
15:37:19 Using Annoy for neighbor search, n_neighbors = 30
15:37:19 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:37:20 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//Rtmpx16qOy/filec5f83ce09dfb
15:37:20 Searching Annoy index using 1 thread, search_k = 3000
15:37:22 Annoy recall = 100%
15:37:28 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
15:37:29 Initializing from normalized Laplacian + noise (using irlba)
15:37:29 Commencing optimization for 500 epochs, with 272244 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:37:38 Optimization finished
DimPlot(cocl2tocis, reduction = 'umap', pt.size = 1)
# Get the scaled data from the cocl2tocis object
cocl2tocis_input_data <- GetAssayData(cocl2tocis, assay = 'RNA', slot = 'scale.data')
# Build list of lineages with at least 5 cells in dab tram and do pearson correlation
cocl2tocis_lin_pearson_list <- list()
for (i in fivecell_cDNA$CoCl2toCis){
temp_pearson <- cor(cocl2tocis_input_data[,colnames(cocl2tocis)[cocl2tocis$Lineage == i]])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cocl2tocis_lin_pearson_list[[i]] <- temp_pearson_filt
}
# Need to do a random sampling of the same thing
cocl2tocis_lin_pearson_rand_list <- list()
num_iter <- 100
for(j in 1:num_iter){
cocl2tocis_lin_pearson_rand_list[[j]] <- list()
for (i in fivecell_cDNA$CoCl2toCis){
set.seed(j)
num_cells <- length(cocl2tocis$Lineage[cocl2tocis$Lineage == i])
temp_pearson <- cor(cocl2tocis_input_data[,sample(colnames(cocl2tocis), num_cells, replace = F)])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cocl2tocis_lin_pearson_rand_list[[j]][[i]] <- temp_pearson_filt
}
}
# Find the mean of the average pearson correlation per lineage
mean_pearson_cocl2tocis <- mean(unlist(lapply(cocl2tocis_lin_pearson_list, mean))) # True mean of average correlations per lineage
means_pearson_cocl2tocis_sim <- sapply(1:length(cocl2tocis_lin_pearson_rand_list), function (y)
mean(unlist(lapply(cocl2tocis_lin_pearson_rand_list[[y]], mean)))) # list of mean of average correlations per lineage
z_mean_pearson_cocl2tocis <- (mean_pearson_cocl2tocis-mean(means_pearson_cocl2tocis_sim))/sd(means_pearson_cocl2tocis_sim) # Z score comparing mean to simulations
pval_mean_pearson_cocl2tocis <- pnorm(z_mean_pearson_cocl2tocis, mean(means_pearson_cocl2tocis_sim), sd(means_pearson_cocl2tocis_sim), lower.tail = F) # calculate p value from z score
# Find the weighted means of the average pearson correlations per lineage
weighted_mean_pearson_cocl2tocis <- weighted.mean(unlist(lapply(cocl2tocis_lin_pearson_list, mean)),
unlist(lapply(cocl2tocis_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cocl2tocis_sim <- sapply(1:length(cocl2tocis_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cocl2tocis_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cocl2tocis_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
z_wmean_pearson_cocl2tocis <- (weighted_mean_pearson_cocl2tocis-mean(weighted_means_pearson_cocl2tocis_sim))/sd(weighted_means_pearson_cocl2tocis_sim) # Z score comparing mean to simulations
pval_wmean_pearson_cocl2tocis <- pnorm(z_wmean_pearson_cocl2tocis, mean(weighted_means_pearson_cocl2tocis_sim), sd(weighted_means_pearson_cocl2tocis_sim), lower.tail = F) # calculate p value from z score
# Compare each individual distribution of pearson correlations to the observed correlation by wilcoxon rank sum test and track pval
wilcox_pval_cocl2tocis <- c()
for (i in 1:length(cocl2tocis_lin_pearson_rand_list)){
sim_means <- unlist(lapply(cocl2tocis_lin_pearson_rand_list[[i]], mean))
wilcox_pval_cocl2tocis <- cbind(wilcox_pval_cocl2tocis, wilcox.test(x = unlist(lapply(cocl2tocis_lin_pearson_list, mean)),
y = sim_means, alternative = 'greater')$p.value)
}
# Save outputs
save(cocl2tocis, cocl2tocis_lin_pearson_list, cocl2tocis_lin_pearson_rand_list, z_mean_pearson_cocl2tocis, pval_mean_pearson_cocl2tocis, z_wmean_pearson_cocl2tocis, pval_wmean_pearson_cocl2tocis, wilcox_pval_cocl2tocis, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cocl2tocis_pearson_sim_results.RData')
rm(cocl2tocis, cocl2tocis_lin_pearson_list, cocl2tocis_lin_pearson_rand_lis, cocl2tocis_input_data)
Warning in rm(cocl2tocis, cocl2tocis_lin_pearson_list, cocl2tocis_lin_pearson_rand_lis, :
object 'cocl2tocis_lin_pearson_rand_lis' not found
Idents(all_data) <- all_data$OG_condition # Change the idents to the OG condition for subsetting to cocl2todabtram
cocl2todabtram <- subset(all_data, idents = 'cocl2todabtram') # Subset down to the cocl2todabtram object
cocl2todabtram <- NormalizeData(cocl2todabtram)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cocl2todabtram <- FindVariableFeatures(cocl2todabtram, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cocl2todabtram <- ScaleData(cocl2todabtram)
Centering and scaling data matrix
|
| | 0%
|
|============================================== | 50%
|
|===========================================================================================| 100%
cocl2todabtram <- RunPCA(cocl2todabtram)
PC_ 1
Positive: GAS7, SOX10, COL9A3, AKAP12, S100B, NFATC2, SOX6, PHACTR1, IRS2, TFAP2A
EPB41L3, OLFML2A, GPM6B, HSPG2, TSC22D1, EVI5, SEMA3B, PLP1, CRYAB, SCARB1
ERBB3, PDZRN3, LIMA1, CBLB, SOX4, PALLD, PMP22, AL139383.1, NCAM1, C1orf198
Negative: BASP1, CAV1, PHLDA1, COL1A1, COL6A2, GYPC, CCND1, IL6ST, SFRP1, TXNRD1
COL6A1, CDC25B, SLC12A8, ANXA1, ARHGAP18, PCLAF, C12orf75, TK1, CEMIP, VEGFC
CYTOR, TXN, PERP, ATP2B1, EGFR, ANLN, MKI67, CAPG, MAP1B, TFPI2
PC_ 2
Positive: DAB2, SLC12A8, NUPR1, LGALS3, ARL4C, CTSK, ITGA11, PLXNC1, IL6ST, SGK1
BIRC7, ITGB8, CHCHD10, PDGFRB, GPNMB, TXNRD1, OLFML2B, LINC00968, COL12A1, C1S
C1R, CEBPB, DEPTOR, BAMBI, NABP1, COL14A1, FTH1, SPON2, MTSS1, ITGAV
Negative: MKI67, CENPF, ANLN, GTSE1, TPX2, CEP55, ASPM, PRC1, NUSAP1, TOP2A
UBE2C, BIRC5, NUF2, UBE2S, H2AFX, KIF11, CDCA2, HMGB2, DEPDC1, NCAPG
CCNA2, CENPA, CENPE, RRM2, HMMR, KIF23, CCNB1, STMN1, KIF2C, LMNB1
PC_ 3
Positive: LOXL2, FN1, THBS1, SPARC, CALD1, COL5A2, MFGE8, HSPG2, FSTL1, TIMP3
CSRP2, MMP2, FGFR1, S100A6, LMO7, SERPINE2, CDC42EP3, ALCAM, PRNP, IFI6
MCAM, NUAK1, GREM2, PALLD, ARID5B, EVI5, RAPH1, OLFML2A, COL6A1, CTHRC1
Negative: PMEL, MLANA, CAPN3, LINC00520, TDRD3, LGALS3, BHLHE41, ID2, SULT1C2, AP1S2
AC004988.1, CITED1, ADGRG1, STK32A, CDK2, RAB38, TYR, BAAT, FBXO32, ATP6V0D2
CHCHD6, GAS5, TRPM1, TRIM63, ABCB5, BCL2A1, EDNRB, AL162457.2, SNHG12, SNHG7
PC_ 4
Positive: GCLM, VCAM1, SLC7A11, FOXF1, KYNU, COL14A1, PKDCC, PTGR1, RHOBTB3, CTSC
CASP1, PGD, TRIM16L, ASPH, C1R, FTH1, MGST1, PCDH18, SQSTM1, TBX2
CEBPD, IGFBP5, FENDRR, SRXN1, LINC01914, GSTM3, HHEX, SDCBP, BAMBI, SLC3A2
Negative: ITGA3, PRSS23, CPA4, SERPINE1, FRMD4A, TNIK, GCNT1, TIMP3, PHLDB2, C12orf75
ABI3BP, DKK1, PLP2, BDNF, EVI2A, AXL, FGF5, ATP1B1, F2R, PDGFC
SPOCD1, GRAMD2B, MT2A, RPS27L, AC092807.3, TPST2, MEGF6, LMO7, SRGN, VGF
PC_ 5
Positive: HELLS, ATAD2, DTL, MCM3, CCNE2, CLSPN, BRCA1, E2F1, MCM4, FANCA
POLD3, GINS2, MCM5, HIST1H1B, CDC6, MCM6, PCNA, BARD1, BRIP1, CHAF1A
DSN1, CDT1, FEN1, DHFR, CDC45, MCM2, HIST1H2AH, ZNF367, ATAD5, HIST1H4C
Negative: CCNB1, CDC20, AURKA, NEK2, KIF20A, CCNB2, PLK1, CDKN3, DLGAP5, PTTG1
DEPDC1, UBE2S, HMGB3, CENPE, PRR11, CENPA, BIRC5, HMMR, PIMREG, JPT1
KIF14, CDCA8, TROAP, PRC1, TPX2, KIF2C, KIF4A, CKS2, CDCA3, KNSTRN
ElbowPlot(cocl2todabtram) # The standard deviation seems to really level off at 10
# Recluster with the appropriate number of dimensions
cocl2todabtram <- FindNeighbors(cocl2todabtram, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
cocl2todabtram <- FindClusters(cocl2todabtram, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 1773
Number of edges: 55850
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8474
Number of communities: 10
Elapsed time: 0 seconds
cocl2todabtram <- RunUMAP(cocl2todabtram, dims = 1:15)
15:39:49 UMAP embedding parameters a = 0.9922 b = 1.112
15:39:49 Read 1773 rows and found 15 numeric columns
15:39:49 Using Annoy for neighbor search, n_neighbors = 30
15:39:49 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:39:50 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//Rtmpx16qOy/filec5f83ea3e8f6
15:39:50 Searching Annoy index using 1 thread, search_k = 3000
15:39:50 Annoy recall = 100%
15:39:50 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
15:39:51 Initializing from normalized Laplacian + noise (using irlba)
15:39:51 Commencing optimization for 500 epochs, with 68614 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:39:54 Optimization finished
DimPlot(cocl2todabtram, reduction = 'umap', pt.size = 1)
# Get the scaled data from the cocl2todabtram object
cocl2todabtram_input_data <- GetAssayData(cocl2todabtram, assay = 'RNA', slot = 'scale.data')
# Build list of lineages with at least 5 cells in dab tram and do pearson correlation
cocl2todabtram_lin_pearson_list <- list()
for (i in fivecell_cDNA$CoCl2toDabTram){
temp_pearson <- cor(cocl2todabtram_input_data[,colnames(cocl2todabtram)[cocl2todabtram$Lineage == i]])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cocl2todabtram_lin_pearson_list[[i]] <- temp_pearson_filt
}
# Need to do a random sampling of the same thing
cocl2todabtram_lin_pearson_rand_list <- list()
num_iter <- 100
for(j in 1:num_iter){
cocl2todabtram_lin_pearson_rand_list[[j]] <- list()
for (i in fivecell_cDNA$CoCl2toDabTram){
set.seed(j)
num_cells <- length(cocl2todabtram$Lineage[cocl2todabtram$Lineage == i])
temp_pearson <- cor(cocl2todabtram_input_data[,sample(colnames(cocl2todabtram), num_cells, replace = F)])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cocl2todabtram_lin_pearson_rand_list[[j]][[i]] <- temp_pearson_filt
}
}
# Find the mean of the average pearson correlation per lineage
mean_pearson_cocl2todabtram <- mean(unlist(lapply(cocl2todabtram_lin_pearson_list, mean))) # True mean of average correlations per lineage
means_pearson_cocl2todabtram_sim <- sapply(1:length(cocl2todabtram_lin_pearson_rand_list), function (y)
mean(unlist(lapply(cocl2todabtram_lin_pearson_rand_list[[y]], mean)))) # list of mean of average correlations per lineage
z_mean_pearson_cocl2todabtram <- (mean_pearson_cocl2todabtram-mean(means_pearson_cocl2todabtram_sim))/sd(means_pearson_cocl2todabtram_sim) # Z score comparing mean to simulations
pval_mean_pearson_cocl2todabtram <- pnorm(z_mean_pearson_cocl2todabtram, mean(means_pearson_cocl2todabtram_sim), sd(means_pearson_cocl2todabtram_sim), lower.tail = F) # calculate p value from z score
# Find the weighted means of the average pearson correlations per lineage
weighted_mean_pearson_cocl2todabtram <- weighted.mean(unlist(lapply(cocl2todabtram_lin_pearson_list, mean)),
unlist(lapply(cocl2todabtram_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cocl2todabtram_sim <- sapply(1:length(cocl2todabtram_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cocl2todabtram_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cocl2todabtram_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
z_wmean_pearson_cocl2todabtram <- (weighted_mean_pearson_cocl2todabtram-mean(weighted_means_pearson_cocl2todabtram_sim))/sd(weighted_means_pearson_cocl2todabtram_sim) # Z score comparing mean to simulations
pval_wmean_pearson_cocl2todabtram <- pnorm(z_wmean_pearson_cocl2todabtram, mean(weighted_means_pearson_cocl2todabtram_sim), sd(weighted_means_pearson_cocl2todabtram_sim), lower.tail = F) # calculate p value from z score
# Compare each individual distribution of pearson correlations to the observed correlation by wilcoxon rank sum test and track pval
wilcox_pval_cocl2todabtram <- c()
for (i in 1:length(cocl2todabtram_lin_pearson_rand_list)){
sim_means <- unlist(lapply(cocl2todabtram_lin_pearson_rand_list[[i]], mean))
wilcox_pval_cocl2todabtram <- cbind(wilcox_pval_cocl2todabtram, wilcox.test(x = unlist(lapply(cocl2todabtram_lin_pearson_list, mean)),
y = sim_means, alternative = 'greater')$p.value)
}
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cocl2todabtram_lin_pearson_list, :
cannot compute exact p-value with ties
# Save outputs
save(cocl2todabtram, cocl2todabtram_lin_pearson_list, cocl2todabtram_lin_pearson_rand_list, z_mean_pearson_cocl2todabtram, pval_mean_pearson_cocl2todabtram, z_wmean_pearson_cocl2todabtram, pval_wmean_pearson_cocl2todabtram, wilcox_pval_cocl2todabtram, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cocl2todabtram_pearson_sim_results.RData')
rm(cocl2todabtram, cocl2todabtram_lin_pearson_list, cocl2todabtram_lin_pearson_rand_list, cocl2todabtram_input_data)
Idents(all_data) <- all_data$OG_condition # Change the idents to the OG condition for subsetting to cis
cis <- subset(all_data, idents = 'cis') # Subset down to the cis object
cis <- NormalizeData(cis)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cis <- FindVariableFeatures(cis, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cis <- ScaleData(cis)
Centering and scaling data matrix
|
| | 0%
|
|============================================== | 50%
|
|===========================================================================================| 100%
cis <- RunPCA(cis)
PC_ 1
Positive: GAS5, CRYAB, LINC00882, TYRP1, SPP1, LINC00520, SPACA3, ISG15, AL078612.2, IFI6
NUPR1, KRTAP19-1, DHRS2, LINC02269, TGFB2, IFIT2, CCL18, PRR9, LINC00492, IFIT1
AC006967.3, SERPINB2, AC021148.2, CIR1, AC103993.1, LINC00595, TRPM1, AL031429.1, AC109635.7, IGFL2-AS1
Negative: NEAT1, FUS, MKI67, PRKDC, TPI1, HNRNPAB, SFPQ, POLR2J3, FASN, ANKRD11
PRRC2C, HNRNPU, KHDRBS1, LINC00511, NUMA1, SSRP1, CCNI, MARCKSL1, HSP90B1, KPNB1
TIMP3, SLC25A13, MTDH, HNRNPA1, FADS1, CCT6A, HNRNPA2B1, SRSF4, GAS7, DNMT1
PC_ 2
Positive: HSP90AA1, H2AFZ, PTMA, UBE2S, HMGB1, TPX2, MKI67, ACTG1, BIRC5, GAPDH
NUSAP1, ACTB, TK1, TUBB, LGALS1, TUBA1B, RANBP1, UBE2C, CKS1B, TOP2A
PTTG1, PRC1, RPS5, DBI, JPT1, NME1, TUBB4B, CFL1, ANP32E, HMGB2
Negative: MALAT1, GABPB1-AS1, NEAT1, CCDC144A, POLR2J3, AC008170.1, N4BP2L2, CADPS, SNHG14, MIR3142HG
ANKRD28, NKTR, TTN, KHDC4, AC058791.1, AUXG01000058.1, LINC-PINT, CBLB, RSRP1, CCNL1
HMGA2-AS1, PAXBP1, AC103691.1, STX16, FTX, SULT1C2, NHLRC3, WSB1, AC016831.5, LINC00488
PC_ 3
Positive: ASPM, MKI67, CENPF, TOP2A, TPX2, GTSE1, KIF14, CENPE, UBE2C, AURKA
NUSAP1, DEPDC1, NUF2, ANLN, PRC1, KIF2C, CCNB1, PLK1, BUB1B, CDCA8
SGO2, CENPA, DLGAP5, HMGB2, KIF11, CEP55, CCNA2, HMMR, KNL1, HJURP
Negative: RPS8, GAPDH, RPS3A, RPS5, RPL5, RPS15A, RPL12, RACK1, RPL7A, SCD
BNIP3, CCNI, PGK1, ERBB3, RGS10, FGFBP2, EEF2, BTF3, NRN1, MARCKSL1
RPL23, IGFBP5, MIA, TPI1, MYC, APOD, PNRC1, HIST3H2A, BTG1, ADGRG1
PC_ 4
Positive: HIST1H1B, HELLS, HIST1H1A, MCM4, DCT, BRCA1, GINS2, MCM10, CLSPN, FANCA
DTL, ATAD2, XRCC2, ATAD5, HIST1H1D, CHAF1A, C21orf58, HIST1H4C, MCM3, HIST1H1E
FRMD4B, BRCA2, EZH2, EXO1, CCNE2, MYBL2, FBXO5, LHFPL3-AS1, CCDC14, MCM2
Negative: FN1, SRGN, TMEM158, CAV1, ANXA1, STC1, ERRFI1, ARL4C, F2R, AHNAK
BASP1, SFRP1, IGFBP7, NGFR, VEGFA, LGALS1, JUN, CCND1, AC020916.1, FOSB
NDRG1, THBS1, AXL, LOXL2, ANXA2, MYOF, SERPINE2, TNFRSF12A, PRNP, SCG2
PC_ 5
Positive: MCM4, CLSPN, ATAD2, UHRF1, GINS2, DTL, MCM3, HELLS, MCM6, E2F1
PCNA, HIST1H1B, MCM5, TMEM158, UNG, FN1, MCM10, CCNE2, CDCA7, CDC6
CHAF1A, MCM2, F2R, CDC25A, FANCA, POLD3, HIST1H1A, FOSB, RBBP8, MSH6
Negative: PLK1, CENPE, ASPM, CCNB1, HMMR, KIF14, GTSE1, ARL6IP1, AURKA, DLGAP5
CENPA, SGO2, TPX2, CCNB2, CDC20, NUSAP1, PRR11, CKS2, KIF20A, DEPDC1
NEK2, FAM83D, NUF2, CENPF, LHFPL3-AS1, CDCA8, KDM5B, TNS1, KIF2C, TOP2A
ElbowPlot(cis) # The standard deviation seems to really level off at 10
# Recluster with the appropriate number of dimensions
cis <- FindNeighbors(cis, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
cis <- FindClusters(cis, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 8303
Number of edges: 243168
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8429
Number of communities: 9
Elapsed time: 0 seconds
cis <- RunUMAP(cis, dims = 1:15)
15:40:48 UMAP embedding parameters a = 0.9922 b = 1.112
15:40:48 Read 8303 rows and found 15 numeric columns
15:40:48 Using Annoy for neighbor search, n_neighbors = 30
15:40:48 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:40:49 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//Rtmpx16qOy/filec5f87b138e9a
15:40:49 Searching Annoy index using 1 thread, search_k = 3000
15:40:51 Annoy recall = 100%
15:40:51 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
15:40:52 Initializing from normalized Laplacian + noise (using irlba)
15:40:52 Commencing optimization for 500 epochs, with 370900 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:41:04 Optimization finished
DimPlot(cis, reduction = 'umap', pt.size = 1)
# Get the scaled data from the cis object
cis_input_data <- GetAssayData(cis, assay = 'RNA', slot = 'scale.data')
# Build list of lineages with at least 5 cells in dab tram and do pearson correlation
cis_lin_pearson_list <- list()
for (i in fivecell_cDNA$Cis){
temp_pearson <- cor(cis_input_data[,colnames(cis)[cis$Lineage == i]])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cis_lin_pearson_list[[i]] <- temp_pearson_filt
}
# Need to do a random sampling of the same thing
cis_lin_pearson_rand_list <- list()
num_iter <- 100
for(j in 1:num_iter){
cis_lin_pearson_rand_list[[j]] <- list()
for (i in fivecell_cDNA$Cis){
set.seed(j)
num_cells <- length(cis$Lineage[cis$Lineage == i])
temp_pearson <- cor(cis_input_data[,sample(colnames(cis), num_cells, replace = F)])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cis_lin_pearson_rand_list[[j]][[i]] <- temp_pearson_filt
}
}
# Find the mean of the average pearson correlation per lineage
mean_pearson_cis <- mean(unlist(lapply(cis_lin_pearson_list, mean))) # True mean of average correlations per lineage
means_pearson_cis_sim <- sapply(1:length(cis_lin_pearson_rand_list), function (y)
mean(unlist(lapply(cis_lin_pearson_rand_list[[y]], mean)))) # list of mean of average correlations per lineage
z_mean_pearson_cis <- (mean_pearson_cis-mean(means_pearson_cis_sim))/sd(means_pearson_cis_sim) # Z score comparing mean to simulations
pval_mean_pearson_cis <- pnorm(z_mean_pearson_cis, mean(means_pearson_cis_sim), sd(means_pearson_cis_sim), lower.tail = F) # calculate p value from z score
# Find the weighted means of the average pearson correlations per lineage
weighted_mean_pearson_cis <- weighted.mean(unlist(lapply(cis_lin_pearson_list, mean)),
unlist(lapply(cis_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cis_sim <- sapply(1:length(cis_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cis_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cis_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
z_wmean_pearson_cis <- (weighted_mean_pearson_cis-mean(weighted_means_pearson_cis_sim))/sd(weighted_means_pearson_cis_sim) # Z score comparing mean to simulations
pval_wmean_pearson_cis <- pnorm(z_wmean_pearson_cis, mean(weighted_means_pearson_cis_sim), sd(weighted_means_pearson_cis_sim), lower.tail = F) # calculate p value from z score
# Compare each individual distribution of pearson correlations to the observed correlation by wilcoxon rank sum test and track pval
wilcox_pval_cis <- c()
for (i in 1:length(cis_lin_pearson_rand_list)){
sim_means <- unlist(lapply(cis_lin_pearson_rand_list[[i]], mean))
wilcox_pval_cis <- cbind(wilcox_pval_cis, wilcox.test(x = unlist(lapply(cis_lin_pearson_list, mean)),
y = sim_means, alternative = 'greater')$p.value)
}
# Save outputs
save(cis, cis_lin_pearson_list, cis_lin_pearson_rand_list, z_mean_pearson_cis, pval_mean_pearson_cis, z_wmean_pearson_cis, pval_wmean_pearson_cis, wilcox_pval_cis, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cis_pearson_sim_results.RData')
rm(cis, cis_lin_pearson_list, cis_lin_pearson_rand_list, cis_input_data)
Idents(all_data) <- all_data$OG_condition # Change the idents to the OG condition for subsetting to cistocis
cistocis <- subset(all_data, idents = 'cistocis') # Subset down to the cistocis object
cistocis <- NormalizeData(cistocis)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cistocis <- FindVariableFeatures(cistocis, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cistocis <- ScaleData(cistocis)
Centering and scaling data matrix
|
| | 0%
|
|============================================== | 50%
|
|===========================================================================================| 100%
cistocis <- RunPCA(cistocis)
PC_ 1
Positive: MKI67, TPX2, CENPF, UBE2S, HMGB2, TUBB, TOP2A, TUBA1B, ASPM, ANLN
NUSAP1, PRC1, HMGB1, UBE2C, GTSE1, TMPO, BIRC5, TUBB4B, H2AFZ, AURKA
DTYMK, SMC4, AURKB, KPNA2, RRM2, KIF11, CENPE, CCNB1, HIST1H4C, CEP55
Negative: SAT1, GAS5, PNRC1, CBLB, SNHG14, GABPB1-AS1, HIST3H2A, MALAT1, PMEL, RPS8
BTG1, CREBRF, CCDC18-AS1, TRMT9B, A2M, NEAT1, LINC00492, APOE, SLC5A3, TNS1
SGCD, CD48, LINC01531, RACK1, PCDH7, RPL7A, LHFPL3-AS1, CADPS, NUPR1, CTSK
PC_ 2
Positive: MLANA, PMEL, LGALS3, CHCHD6, LHFPL3-AS1, PRDX1, CD63, GSTO1, MIF, FRMD4B
AKR1A1, ATOX1, DCT, H2AFZ, SCD, CAPG, MITF, ADGRG1, TSPAN10, MIA
TIMM50, RLBP1, QPCT, CAPN3, RHOQ, BCAS3, FXYD3, METTL9, SIRPA, FABP5
Negative: FN1, IGFBP7, MYOF, THBS1, TMEM158, F2R, CAV1, BASP1, PRNP, SFRP1
ANXA2, SCG2, DKK1, AHNAK, ANXA1, SERPINE2, MMP2, LMO7, COL1A1, IL6ST
CALD1, CRIM1, ARL4C, VCL, DPYSL2, COL6A2, SORBS2, MMP1, AXL, ITGA2
PC_ 3
Positive: MALAT1, NEAT1, TTN, PLAC4, ASPM, CCDC144A, LINC00488, AC058791.1, HMGA2-AS1, CLCN7
SYNE2, CENPF, AC008170.1, PRDM7, AUXG01000058.1, MIR3142HG, ZFYVE16, SNHG14, ANKRD11, DST
MKI67, RSRP1, NABP1, KIAA2026, NAV2, CBLB, AC016831.1, AC003681.1, KIF14, SMCR5
Negative: MIF, TMSB10, FTL, FTH1, UQCRH, DBI, SH3BGRL3, PSMA7, LGALS1, S100A6
ATP5MC3, RPS8, SERF2, RPL8, ATP5PF, NDUFC2, RPS6, TXN, MT2A, GYPC
S100A11, AP2S1, TMSB4X, COX7A2, NDUFB2, LGALS3, ATOX1, S100A13, ACTB, POMP
PC_ 4
Positive: MCM4, MCM3, MCM6, HELLS, ATAD2, DTL, PCNA, BRCA1, CDC6, UHRF1
CLSPN, MCM5, GINS2, MSH6, UNG, CHAF1A, E2F1, CCNE2, POLD3, MCM7
MCM2, MCM10, HIST1H1B, FEN1, XRCC2, EXO1, CDCA7, WDR76, HIST1H1D, FANCA
Negative: CCNB1, AURKA, PLK1, HMMR, ARL6IP1, CENPE, CDC20, DLGAP5, KIF14, CENPA
PTTG1, TPX2, NEK2, GTSE1, ASPM, CCNB2, CDKN3, CKS2, PRR11, DEPDC1
KIF20A, UBE2C, UBE2S, KIF2C, BUB1, TUBB4B, NUF2, CEP55, BIRC5, CDCA8
PC_ 5
Positive: TMEM158, SLC20A1, STC1, IGFN1, SCG2, MAGI1, SFRP1, NEAT1, VEGFA, RAB27B
COL6A5, NRP1, TFPI2, E2F7, MMP1, PRDM7, SRGN, ITGA3, IER3, SPOCK1
COL1A1, NPAS2, BASP1, ARSG, DCBLD2, XRCC2, BIRC7, LINC00488, LHFPL3-AS1, AC008170.1
Negative: PMP22, S100B, ANXA2, SPARC, SORBS2, PLP1, NIBAN1, PALLD, MARCKS, EPB41L3
AHNAK, TFAP2A, ALCAM, ADAM23, HSPG2, OLFML2A, NTRK2, IL17D, ESRP1, RPS8
SNAI2, CALD1, GAS7, PMP2, RHOBTB3, SLITRK6, MAP2, ZEB2, PYCARD, DYNLL1
ElbowPlot(cistocis) # The standard deviation seems to really level off at 10
# Recluster with the appropriate number of dimensions
cistocis <- FindNeighbors(cistocis, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
cistocis <- FindClusters(cistocis, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 3323
Number of edges: 112104
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8576
Number of communities: 11
Elapsed time: 0 seconds
cistocis <- RunUMAP(cistocis, dims = 1:15)
15:42:25 UMAP embedding parameters a = 0.9922 b = 1.112
15:42:25 Read 3323 rows and found 15 numeric columns
15:42:25 Using Annoy for neighbor search, n_neighbors = 30
15:42:25 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:42:25 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//Rtmpx16qOy/filec5f840f75f96
15:42:25 Searching Annoy index using 1 thread, search_k = 3000
15:42:26 Annoy recall = 100%
15:42:26 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
15:42:27 Initializing from normalized Laplacian + noise (using irlba)
15:42:27 Commencing optimization for 500 epochs, with 131628 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:42:32 Optimization finished
DimPlot(cistocis, reduction = 'umap', pt.size = 1)
# Get the scaled data from the cistocis object
cistocis_input_data <- GetAssayData(cistocis, assay = 'RNA', slot = 'scale.data')
# Build list of lineages with at least 5 cells in dab tram and do pearson correlation
cistocis_lin_pearson_list <- list()
for (i in fivecell_cDNA$CistoCis){
temp_pearson <- cor(cistocis_input_data[,colnames(cistocis)[cistocis$Lineage == i]])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cistocis_lin_pearson_list[[i]] <- temp_pearson_filt
}
# Need to do a random sampling of the same thing
cistocis_lin_pearson_rand_list <- list()
num_iter <- 100
for(j in 1:num_iter){
cistocis_lin_pearson_rand_list[[j]] <- list()
for (i in fivecell_cDNA$CistoCis){
set.seed(j)
num_cells <- length(cistocis$Lineage[cistocis$Lineage == i])
temp_pearson <- cor(cistocis_input_data[,sample(colnames(cistocis), num_cells, replace = F)])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cistocis_lin_pearson_rand_list[[j]][[i]] <- temp_pearson_filt
}
}
# Find the mean of the average pearson correlation per lineage
mean_pearson_cistocis <- mean(unlist(lapply(cistocis_lin_pearson_list, mean))) # True mean of average correlations per lineage
means_pearson_cistocis_sim <- sapply(1:length(cistocis_lin_pearson_rand_list), function (y)
mean(unlist(lapply(cistocis_lin_pearson_rand_list[[y]], mean)))) # list of mean of average correlations per lineage
z_mean_pearson_cistocis <- (mean_pearson_cistocis-mean(means_pearson_cistocis_sim))/sd(means_pearson_cistocis_sim) # Z score comparing mean to simulations
pval_mean_pearson_cistocis <- pnorm(z_mean_pearson_cistocis, mean(means_pearson_cistocis_sim), sd(means_pearson_cistocis_sim), lower.tail = F) # calculate p value from z score
# Find the weighted means of the average pearson correlations per lineage
weighted_mean_pearson_cistocis <- weighted.mean(unlist(lapply(cistocis_lin_pearson_list, mean)),
unlist(lapply(cistocis_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cistocis_sim <- sapply(1:length(cistocis_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cistocis_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cistocis_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
z_wmean_pearson_cistocis <- (weighted_mean_pearson_cistocis-mean(weighted_means_pearson_cistocis_sim))/sd(weighted_means_pearson_cistocis_sim) # Z score comparing mean to simulations
pval_wmean_pearson_cistocis <- pnorm(z_wmean_pearson_cistocis, mean(weighted_means_pearson_cistocis_sim), sd(weighted_means_pearson_cistocis_sim), lower.tail = F) # calculate p value from z score
# Compare each individual distribution of pearson correlations to the observed correlation by wilcoxon rank sum test and track pval
wilcox_pval_cistocis <- c()
for (i in 1:length(cistocis_lin_pearson_rand_list)){
sim_means <- unlist(lapply(cistocis_lin_pearson_rand_list[[i]], mean))
wilcox_pval_cistocis <- cbind(wilcox_pval_cistocis, wilcox.test(x = unlist(lapply(cistocis_lin_pearson_list, mean)),
y = sim_means, alternative = 'greater')$p.value)
}
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistocis_lin_pearson_list, :
cannot compute exact p-value with ties
# Save outputs
save(cistocis, cistocis_lin_pearson_list, cistocis_lin_pearson_rand_list, z_mean_pearson_cistocis, pval_mean_pearson_cistocis, z_wmean_pearson_cistocis, pval_wmean_pearson_cistocis, wilcox_pval_cistocis, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cistocis_pearson_sim_results.RData')
rm(cistocis, cistocis_lin_pearson_list, cistocis_lin_pearson_rand_list, cistocis_input_data)
Idents(all_data) <- all_data$OG_condition # Change the idents to the OG condition for subsetting to cistodabtram
cistodabtram <- subset(all_data, idents = 'cistodabtram') # Subset down to the cistodabtram object
cistodabtram <- NormalizeData(cistodabtram)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cistodabtram <- FindVariableFeatures(cistodabtram, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cistodabtram <- ScaleData(cistodabtram)
Centering and scaling data matrix
|
| | 0%
|
|============================================== | 50%
|
|===========================================================================================| 100%
cistodabtram <- RunPCA(cistodabtram)
PC_ 1
Positive: BASP1, CAV1, TUBB, MKI67, ANLN, PKM, CALR, DTYMK, TK1, UBE2S
TMPO, COL1A1, COTL1, SMC4, TPX2, TPM1, HIST1H4C, BIRC5, C12orf75, HSPD1
PCLAF, PRC1, NUSAP1, GTSE1, H2AFZ, TUBA1C, MYH9, CENPF, NXN, TUBA1B
Negative: PHACTR1, CRYAB, MLANA, SOX10, S100B, AKAP12, PMEL, ZNF704, PLP1, GPM6B
GAS7, SOX6, COL9A3, CITED1, CADPS, CBLB, ERBB3, SOX5, MXI1, AL139383.1
ID4, CA8, AC110285.1, TTN, TSC22D1, NFATC2, TFAP2A, IRS2, STARD4-AS1, AC068587.4
PC_ 2
Positive: MKI67, CENPF, ANLN, TPX2, NUSAP1, GTSE1, ASPM, CEP55, PRC1, UBE2C
TOP2A, RRM2, STMN1, NCAPG, CENPE, S100B, BIRC5, KIF2C, CCNB1, CRYAB
HMMR, KNL1, PTTG1, KIF11, ITGA6, AURKB, MAD2L1, AURKA, CDCA2, GAS7
Negative: COL14A1, ITGB8, COL1A1, SLC12A8, COL12A1, IGFBP5, C1R, LINC00968, FTL, COL15A1
PDE5A, COL6A1, VCAM1, C1S, TXNRD1, IL6ST, CAMK2N1, SPON2, IGFBP7, DAB2
TMEM47, DEPTOR, SLC7A11, NUPR1, ITGA11, COL6A2, MSC, BGN, ADD3, NXN
PC_ 3
Positive: TTN, AUXG01000058.1, CENPF, CCNB1, AC058791.1, HMGA2-AS1, BIRC5, CENPE, EREG, CEP55
CDC20, AURKA, AP000462.2, CDCA8, TPX2, DEPDC1, CENPA, AC114760.2, TOP2A, DLGAP5
MKI67, ANLN, PTTG1, PRC1, RRM2, UBE2C, ASPM, PCLAF, NEK2, CDKN3
Negative: HSPG2, EVI5, SPARC, IFI6, PALLD, OLFML2A, GAS7, COL9A3, PMP22, MFGE8
NFATC2, ANXA2, CTSD, GREM2, MMP2, AEBP1, ITGA2, DAG1, TUBA1A, CSRP2
FN1, MCAM, LIMCH1, SOX4, FAM89A, P4HA1, ARID5B, CD59, GPM6B, ITGA6
PC_ 4
Positive: CPA4, ITGA3, FRMD4A, OXTR, DKK1, SCG5, SERPINE1, ARL4C, AC092807.3, KRT34
AXL, LMO7, SMYD3, SERINC2, CRISPLD2, TNIK, HMGA2, TPM1, LINC01638, PLPP4
TIMP3, GRAMD2B, VEGFC, MAGI1, PHLDB2, BDNF, PRSS23, TNC, SRGN, RPS27L
Negative: VCAM1, IGFBP5, GCLM, COL14A1, SLC7A11, FTL, C1R, CEBPD, TMEM47, ASPH
PGD, PITX1, EPS8, C1S, SRXN1, SOX4, PKDCC, TRIM16L, OSGIN1, HSD17B2
RHOBTB3, FOXF1, SQSTM1, DCN, ITGB8, LINC01914, AKR1B10, LINC00968, ITGA4, LSAMP
PC_ 5
Positive: NEAT1, GLS, HMGA2-AS1, AUXG01000058.1, AC083870.1, CCDC14, AC058791.1, TTN, STARD4-AS1, AC012349.1
PZP, NABP1, AC114760.2, AP000462.2, CALD1, SORBS2, PSD3, HELLS, FANCA, IGFN1
XRCC2, BRCA1, C21orf58, AC016831.1, MAGI1, ATAD5, THBS1, COL1A1, FN1, BRCA2
Negative: PRDX1, LGALS3, CSTB, TMSB10, SH3BGRL3, FTL, S100A11, TMSB4X, MLANA, S100A10
CAPG, FKBP1A, MT2A, S100A6, CAV1, PTTG1, CCDC85B, NME1, CITED1, EZR
PKM, TRIM63, JPT1, AP1S2, APOE, TXN, HSPD1, H2AFZ, DSTN, SFRP1
ElbowPlot(cistodabtram) # The standard deviation seems to really level off at 10
# Recluster with the appropriate number of dimensions
cistodabtram <- FindNeighbors(cistodabtram, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
cistodabtram <- FindClusters(cistodabtram, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 2615
Number of edges: 89129
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8682
Number of communities: 9
Elapsed time: 0 seconds
cistodabtram <- RunUMAP(cistodabtram, dims = 1:15)
15:43:27 UMAP embedding parameters a = 0.9922 b = 1.112
15:43:27 Read 2615 rows and found 15 numeric columns
15:43:27 Using Annoy for neighbor search, n_neighbors = 30
15:43:27 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:43:27 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//Rtmpx16qOy/filec5f830a4922c
15:43:27 Searching Annoy index using 1 thread, search_k = 3000
15:43:28 Annoy recall = 100%
15:43:28 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
15:43:29 Initializing from normalized Laplacian + noise (using irlba)
15:43:29 Commencing optimization for 500 epochs, with 105536 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:43:32 Optimization finished
DimPlot(cistodabtram, reduction = 'umap', pt.size = 1)
# Get the scaled data from the cistodabtram object
cistodabtram_input_data <- GetAssayData(cistodabtram, assay = 'RNA', slot = 'scale.data')
# Build list of lineages with at least 5 cells in dab tram and do pearson correlation
cistodabtram_lin_pearson_list <- list()
for (i in fivecell_cDNA$CistoDabTram){
temp_pearson <- cor(cistodabtram_input_data[,colnames(cistodabtram)[cistodabtram$Lineage == i]])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cistodabtram_lin_pearson_list[[i]] <- temp_pearson_filt
}
# Need to do a random sampling of the same thing
cistodabtram_lin_pearson_rand_list <- list()
num_iter <- 100
for(j in 1:num_iter){
cistodabtram_lin_pearson_rand_list[[j]] <- list()
for (i in fivecell_cDNA$CistoDabTram){
set.seed(j)
num_cells <- length(cistodabtram$Lineage[cistodabtram$Lineage == i])
temp_pearson <- cor(cistodabtram_input_data[,sample(colnames(cistodabtram), num_cells, replace = F)])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cistodabtram_lin_pearson_rand_list[[j]][[i]] <- temp_pearson_filt
}
}
# Find the mean of the average pearson correlation per lineage
mean_pearson_cistodabtram <- mean(unlist(lapply(cistodabtram_lin_pearson_list, mean))) # True mean of average correlations per lineage
means_pearson_cistodabtram_sim <- sapply(1:length(cistodabtram_lin_pearson_rand_list), function (y)
mean(unlist(lapply(cistodabtram_lin_pearson_rand_list[[y]], mean)))) # list of mean of average correlations per lineage
z_mean_pearson_cistodabtram <- (mean_pearson_cistodabtram-mean(means_pearson_cistodabtram_sim))/sd(means_pearson_cistodabtram_sim) # Z score comparing mean to simulations
pval_mean_pearson_cistodabtram <- pnorm(z_mean_pearson_cistodabtram, mean(means_pearson_cistodabtram_sim), sd(means_pearson_cistodabtram_sim), lower.tail = F) # calculate p value from z score
# Find the weighted means of the average pearson correlations per lineage
weighted_mean_pearson_cistodabtram <- weighted.mean(unlist(lapply(cistodabtram_lin_pearson_list, mean)),
unlist(lapply(cistodabtram_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cistodabtram_sim <- sapply(1:length(cistodabtram_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cistodabtram_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cistodabtram_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
z_wmean_pearson_cistodabtram <- (weighted_mean_pearson_cistodabtram-mean(weighted_means_pearson_cistodabtram_sim))/sd(weighted_means_pearson_cistodabtram_sim) # Z score comparing mean to simulations
pval_wmean_pearson_cistodabtram <- pnorm(z_wmean_pearson_cistodabtram, mean(weighted_means_pearson_cistodabtram_sim), sd(weighted_means_pearson_cistodabtram_sim), lower.tail = F) # calculate p value from z score
# Compare each individual distribution of pearson correlations to the observed correlation by wilcoxon rank sum test and track pval
wilcox_pval_cistodabtram <- c()
for (i in 1:length(cistodabtram_lin_pearson_rand_list)){
sim_means <- unlist(lapply(cistodabtram_lin_pearson_rand_list[[i]], mean))
wilcox_pval_cistodabtram <- cbind(wilcox_pval_cistodabtram, wilcox.test(x = unlist(lapply(cistodabtram_lin_pearson_list, mean)),
y = sim_means, alternative = 'greater')$p.value)
}
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
Warning in wilcox.test.default(x = unlist(lapply(cistodabtram_lin_pearson_list, :
cannot compute exact p-value with ties
# Save outputs
save(cistodabtram, cistodabtram_lin_pearson_list, cistodabtram_lin_pearson_rand_list, z_mean_pearson_cistodabtram, pval_mean_pearson_cistodabtram, z_wmean_pearson_cistodabtram, pval_wmean_pearson_cistodabtram, wilcox_pval_cistodabtram, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cistodabtram_pearson_sim_results.RData')
rm(cistodabtram, cistodabtram_lin_pearson_list, cistodabtram_lin_pearson_rand_list, cistodabtram_input_data)
Idents(all_data) <- all_data$OG_condition # Change the idents to the OG condition for subsetting to cistococl2
cistococl2 <- subset(all_data, idents = 'cistococl2') # Subset down to the cistococl2 object
cistococl2 <- NormalizeData(cistococl2)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cistococl2 <- FindVariableFeatures(cistococl2, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
cistococl2 <- ScaleData(cistococl2)
Centering and scaling data matrix
|
| | 0%
|
|============================================== | 50%
|
|===========================================================================================| 100%
cistococl2 <- RunPCA(cistococl2)
PC_ 1
Positive: MALAT1, HMGA2-AS1, TTN, CSTB, AC058791.1, LINC00488, PLAC4, SNHG14, MIR3142HG, AC008170.1
NEAT1, AC008771.1, IGFL2-AS1, HSPA1B, TM4SF19-AS1, PRR26, NABP1, HIF1A-AS3, SAT1, HSPA1A
LINC01705, AL356599.1, AL121772.3, AL162426.1, MIR22HG, AL022323.4, ARSG, UCKL1-AS1, CLEC2D, KCNC1
Negative: CFL1, PRKDC, PKM, RPS8, ACTB, CAV1, SFRP1, TIMP3, KDELR1, TPI1
CALR, HSP90B1, YWHAZ, HIST1H4C, MKI67, HNRNPD, HNRNPAB, MTDH, ITGB1, AP2S1
CCNI, EEF2, HIST1H1B, RCN1, NCL, PA2G4, CCT6A, ACTN1, LDHB, MCM4
PC_ 2
Positive: PMEL, MLANA, MT-ND4, H2AFZ, MT-CYB, MITF, MYH10, DCT, MT-ND1, ATOX1
CHCHD6, S100B, GPM6B, CAPG, EEF1A1, MT-ND2, SMS, MKI67, LDHB, MT-ND3
NBL1, PIK3R3, RPS8, RPS15A, HMGB1, MIA, RPL28, FRMD4B, PARP1, FABP5
Negative: MT2A, MALAT1, TMEM158, SFRP1, FN1, SCG2, IER3, TFPI2, IL6ST, EREG
VEGFA, BASP1, JUN, ITGA2, PHLDA1, PPP1R15A, ARL4C, IGFBP7, LUCAT1, DKK1
SRGN, SIRPB1, TNFRSF12A, SERPINB2, ITGA3, STC1, GNG11, COL1A1, HIF1A-AS3, PLAUR
PC_ 3
Positive: FTL, FTH1, TMSB10, LGALS1, RPL28, S100A6, MT2A, TMSB4X, RPS15A, RPS8
EEF1A1, SH3BGRL3, PRDX1, RACK1, NQO1, NDUFA4, CSTB, ACTG1, ATOX1, CFL1
RAB13, COX8A, UQCRH, ACTB, GSTP1, H2AFZ, CCND1, GAS5, EEF1D, RPL12
Negative: NEAT1, MALAT1, ANKRD11, ZFYVE16, GOLGA4, GABPB1-AS1, HMGA2-AS1, FRMD4B, SNHG14, MIR3142HG
AC058791.1, NFKBIZ, LINC02249, PLAC4, VMP1, ARSG, AC016831.1, AC068587.4, CADPS, SLC5A3
SLC20A1, CBLB, AKAP12, NABP1, AC008170.1, PIK3R3, FRMD4A, TRIM25, AC008771.1, AF117829.1
PC_ 4
Positive: SFRP1, COL1A1, CXCL12, COL6A2, COL6A1, WNT5A, IGFBP7, SRGN, MT-ND4, CCN4
SCG2, MT-CYB, FN1, EREG, MT-ND1, LTBP1, TMEM158, NRG1, MT-ND3, NRP1
S100A6, TIMP3, MT-ND2, IGFBP5, AQP1, SIRPB1, SPOCK1, LYPD6, ITGA11, SNAP25
Negative: GADD45B, PPP1R15A, DDIT3, IER2, SNHG7, H3F3B, HSPA1A, EIF5, HSPH1, KBTBD8
HSPA1B, SERTAD1, ZFAND2A, BRD2, ATF4, DNAJB9, AL118516.1, ODC1, ATF3, AC003092.1
PDRG1, LRIF1, SNHG12, BUD31, LINC00520, SNHG15, DNAJB1, OSER1, ATP6V0B, IL24
PC_ 5
Positive: RPS8, SERPINE2, PMP22, LY6E, PLP1, SPARC, RAMP1, GPM6B, MIA, VKORC1
SORBS2, CCNI, CEBPD, CST3, SEZ6L2, MFSD12, CADM1, PRSS35, KDELR1, TFAP2A
AHNAK, RPL28, MARCKS, GAS7, SLC5A3, CDH19, CBLB, CADPS, SLC44A1, MARCKSL1
Negative: MKI67, CENPF, ASPM, TOP2A, UBE2C, NUSAP1, ANLN, GTSE1, TPX2, HMGB2
RRM2, CEP55, AURKA, PRC1, AURKB, CENPE, KIF14, HIST1H1B, SMC4, NCAPG
SGO2, KIF23, HJURP, KNL1, CCNB1, ATAD2, DEPDC1, KIF11, CDK1, BIRC5
ElbowPlot(cistococl2) # The standard deviation seems to really level off at 10
# Recluster with the appropriate number of dimensions
cistococl2 <- FindNeighbors(cistococl2, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
cistococl2 <- FindClusters(cistococl2, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 5715
Number of edges: 198390
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8451
Number of communities: 6
Elapsed time: 0 seconds
cistococl2 <- RunUMAP(cistococl2, dims = 1:15)
15:46:18 UMAP embedding parameters a = 0.9922 b = 1.112
15:46:18 Read 5715 rows and found 15 numeric columns
15:46:18 Using Annoy for neighbor search, n_neighbors = 30
15:46:18 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:46:19 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//Rtmpx16qOy/filec5f84789fa7
15:46:19 Searching Annoy index using 1 thread, search_k = 3000
15:46:20 Annoy recall = 100%
15:46:20 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
15:46:21 Initializing from normalized Laplacian + noise (using irlba)
15:46:21 Commencing optimization for 500 epochs, with 238026 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:46:29 Optimization finished
DimPlot(cistococl2, reduction = 'umap', pt.size = 1)
# Get the scaled data from the cistococl2 object
cistococl2_input_data <- GetAssayData(cistococl2, assay = 'RNA', slot = 'scale.data')
# Build list of lineages with at least 5 cells in dab tram and do pearson correlation
cistococl2_lin_pearson_list <- list()
for (i in fivecell_cDNA$CistoCoCl2){
temp_pearson <- cor(cistococl2_input_data[,colnames(cistococl2)[cistococl2$Lineage == i]])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cistococl2_lin_pearson_list[[i]] <- temp_pearson_filt
}
# Need to do a random sampling of the same thing
cistococl2_lin_pearson_rand_list <- list()
num_iter <- 100
for(j in 1:num_iter){
cistococl2_lin_pearson_rand_list[[j]] <- list()
for (i in fivecell_cDNA$CistoCoCl2){
set.seed(j)
num_cells <- length(cistococl2$Lineage[cistococl2$Lineage == i])
temp_pearson <- cor(cistococl2_input_data[,sample(colnames(cistococl2), num_cells, replace = F)])
temp_pearson_filt <- temp_pearson[lower.tri(temp_pearson, diag = FALSE)]
cistococl2_lin_pearson_rand_list[[j]][[i]] <- temp_pearson_filt
}
}
dir.create('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots')
# DabTram first ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/dabtram_pearson_sim_results.RData')
wilcox_pvals_df <- data.frame("DabTram" = as.numeric(wilcox_pval_dabtram))
weighted_mean_pearson_dabtram <- weighted.mean(unlist(lapply(dabtram_lin_pearson_list, mean)),
unlist(lapply(dabtram_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_dabtram_sim <- sapply(1:length(dabtram_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(dabtram_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(dabtram_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/dabtram.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_dabtram_sim), aes(x = weighted_means_pearson_dabtram_sim))+geom_histogram(fill = '#623594', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_dabtram, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_dabtram+.03, label = round(weighted_mean_pearson_dabtram, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'DabTram simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_dabtram+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(dabtram_lin_pearson_list, mean)),unlist(lapply(dabtram_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(dabtram_lin_pearson_list)),rep('Simulation', length(dabtram_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#623594","#999999")) + scale_fill_manual(values=c("#623594","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example DabTram plot')
dev.off()
rm(dabtram,dabtram_lin_pearson_list,dabtram_lin_pearson_rand_list, wilcox_pval_dabtram, plot_df)
# dabtramtodabtram ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/dabtramtodabtram_pearson_sim_results.RData')
wilcox_pvals_df$DabTramtoDabTram <- as.numeric(wilcox_pval_dabtramtodabtram)
weighted_mean_pearson_dabtramtodabtramtodabtramtodabtram <- weighted.mean(unlist(lapply(dabtramtodabtram_lin_pearson_list, mean)),
unlist(lapply(dabtramtodabtram_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_dabtramtodabtram_sim <- sapply(1:length(dabtramtodabtram_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(dabtramtodabtram_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(dabtramtodabtram_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/dabtramtodabtram.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_dabtramtodabtram_sim), aes(x = weighted_means_pearson_dabtramtodabtram_sim))+geom_histogram(fill = '#561e59', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_dabtramtodabtram, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_dabtramtodabtram+.03, label = round(weighted_mean_pearson_dabtramtodabtram, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'DabTramtoDabTram simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_dabtramtodabtram+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(dabtramtodabtram_lin_pearson_list, mean)),unlist(lapply(dabtramtodabtram_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(dabtramtodabtram_lin_pearson_list)),rep('Simulation', length(dabtramtodabtram_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#561e59","#999999")) + scale_fill_manual(values=c("#561e59","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example DabTramtoDabTram plot')
dev.off()
rm(dabtramtodabtram,dabtramtodabtram_lin_pearson_list,dabtramtodabtram_lin_pearson_rand_list, wilcox_pval_dabtramtodabtram, plot_df)
# dabtramtocis ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/dabtramtocis_pearson_sim_results.RData')
wilcox_pvals_df$DabTramtoCis <- as.numeric(wilcox_pval_dabtramtocis)
weighted_mean_pearson_dabtramtocis <- weighted.mean(unlist(lapply(dabtramtocis_lin_pearson_list, mean)),
unlist(lapply(dabtramtocis_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_dabtramtocis_sim <- sapply(1:length(dabtramtocis_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(dabtramtocis_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(dabtramtocis_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/dabtramtocis.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_dabtramtocis_sim), aes(x = weighted_means_pearson_dabtramtocis_sim))+geom_histogram(fill = '#9685be', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_dabtramtocis, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_dabtramtocis+.03, label = round(weighted_mean_pearson_dabtramtocis, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'DabTramtoCis simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_dabtramtocis+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(dabtramtocis_lin_pearson_list, mean)),unlist(lapply(dabtramtocis_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(dabtramtocis_lin_pearson_list)),rep('Simulation', length(dabtramtocis_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#9685be","#999999")) + scale_fill_manual(values=c("#9685be","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example dabtramtocis plot')
dev.off()
rm(dabtramtocis,dabtramtocis_lin_pearson_list,dabtramtocis_lin_pearson_rand_list, wilcox_pval_dabtramtocis,plot_df)
# dabtramtococl2 ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/dabtramtococl2_pearson_sim_results.RData')
wilcox_pvals_df$DabTramtoCoCl2 <- as.numeric(wilcox_pval_dabtramtococl2)
weighted_mean_pearson_dabtramtococl2 <- weighted.mean(unlist(lapply(dabtramtococl2_lin_pearson_list, mean)),
unlist(lapply(dabtramtococl2_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_dabtramtococl2_sim <- sapply(1:length(dabtramtococl2_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(dabtramtococl2_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(dabtramtococl2_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/dabtramtococl2.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_dabtramtococl2_sim), aes(x = weighted_means_pearson_dabtramtococl2_sim))+geom_histogram(fill = '#a2248e', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_dabtramtococl2, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_dabtramtococl2+.03, label = round(weighted_mean_pearson_dabtramtococl2, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'DabTramtoCoCl2 simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_dabtramtococl2+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(dabtramtococl2_lin_pearson_list, mean)),unlist(lapply(dabtramtococl2_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(dabtramtococl2_lin_pearson_list)),rep('Simulation', length(dabtramtococl2_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#a2248e","#999999")) + scale_fill_manual(values=c("#a2248e","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example DabTramtoCoCl2 plot')
dev.off()
rm(dabtramtococl2,dabtramtococl2_lin_pearson_list,dabtramtococl2_lin_pearson_rand_list, wilcox_pval_dabtramtococl2,plot_df)
# cocl2 ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cocl2_pearson_sim_results.RData')
wilcox_pvals_df$CoCl2 <- as.numeric(wilcox_pval_cocl2)
weighted_mean_pearson_cocl2 <- weighted.mean(unlist(lapply(cocl2_lin_pearson_list, mean)),
unlist(lapply(cocl2_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cocl2_sim <- sapply(1:length(cocl2_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cocl2_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cocl2_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/cocl2.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_cocl2_sim), aes(x = weighted_means_pearson_cocl2_sim))+geom_histogram(fill = '#0f8241', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_cocl2, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_cocl2+.03, label = round(weighted_mean_pearson_cocl2, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'CoCl2 simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_cocl2+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(cocl2_lin_pearson_list, mean)),unlist(lapply(cocl2_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(cocl2_lin_pearson_list)),rep('Simulation', length(cocl2_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#0f8241","#999999")) + scale_fill_manual(values=c("#0f8241","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example cocl2 plot')
dev.off()
rm(cocl2,cocl2_lin_pearson_list,cocl2_lin_pearson_rand_list, wilcox_pval_cocl2,plot_df)
# cocl2tococl2 ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cocl2tococl2_pearson_sim_results.RData')
wilcox_pvals_df$CoCl2toCoCl2 <- as.numeric(wilcox_pval_cocl2tococl2)
weighted_mean_pearson_cocl2tococl2 <- weighted.mean(unlist(lapply(cocl2tococl2_lin_pearson_list, mean)),
unlist(lapply(cocl2tococl2_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cocl2tococl2_sim <- sapply(1:length(cocl2tococl2_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cocl2tococl2_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cocl2tococl2_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/cocl2tococl2.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_cocl2tococl2_sim), aes(x = weighted_means_pearson_cocl2tococl2_sim))+geom_histogram(fill = '#6abd45', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_cocl2tococl2, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_cocl2tococl2+.03, label = round(weighted_mean_pearson_cocl2tococl2, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'CoCl2toCoCl2 simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_cocl2tococl2+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(cocl2tococl2_lin_pearson_list, mean)),unlist(lapply(cocl2tococl2_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(cocl2tococl2_lin_pearson_list)),rep('Simulation', length(cocl2tococl2_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#6abd45","#999999")) + scale_fill_manual(values=c("#6abd45","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example CoCl2toCoCl2 plot')
dev.off()
rm(cocl2tococl2,cocl2tococl2_lin_pearson_list,cocl2tococl2_lin_pearson_rand_list, wilcox_pval_cocl2tococl2,plot_df)
# cocl2todabtram ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cocl2todabtram_pearson_sim_results.RData')
wilcox_pvals_df$CoCl2toDabTram <- as.numeric(wilcox_pval_cocl2todabtram)
weighted_mean_pearson_cocl2todabtram <- weighted.mean(unlist(lapply(cocl2todabtram_lin_pearson_list, mean)),
unlist(lapply(cocl2todabtram_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cocl2todabtram_sim <- sapply(1:length(cocl2todabtram_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cocl2todabtram_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cocl2todabtram_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/cocl2todabtram.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_cocl2todabtram_sim), aes(x = weighted_means_pearson_cocl2todabtram_sim))+geom_histogram(fill = '#10413b', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_cocl2todabtram, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_cocl2todabtram+.03, label = round(weighted_mean_pearson_cocl2todabtram, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'CoCl2toDabTram simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_cocl2todabtram+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(cocl2todabtram_lin_pearson_list, mean)),unlist(lapply(cocl2todabtram_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(cocl2todabtram_lin_pearson_list)),rep('Simulation', length(cocl2todabtram_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#10413b","#999999")) + scale_fill_manual(values=c("#10413b","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example CoCl2toDabTram plot')
dev.off()
rm(cocl2todabtram,cocl2todabtram_lin_pearson_list,cocl2todabtram_lin_pearson_rand_list, wilcox_pval_cocl2todabtram,plot_df)
# cocl2tocis ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cocl2tocis_pearson_sim_results.RData')
wilcox_pvals_df$CoCl2toCis <- as.numeric(wilcox_pval_cocl2tocis)
weighted_mean_pearson_cocl2tocis <- weighted.mean(unlist(lapply(cocl2tocis_lin_pearson_list, mean)),
unlist(lapply(cocl2tocis_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cocl2tocis_sim <- sapply(1:length(cocl2tocis_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cocl2tocis_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cocl2tocis_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/cocl2tocis.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_cocl2tocis_sim), aes(x = weighted_means_pearson_cocl2tocis_sim))+geom_histogram(fill = '#6dc49c', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_cocl2tocis, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_cocl2tocis+.03, label = round(weighted_mean_pearson_cocl2tocis, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'CoCl2toCis simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_cocl2tocis+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(cocl2tocis_lin_pearson_list, mean)),unlist(lapply(cocl2tocis_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(cocl2tocis_lin_pearson_list)),rep('Simulation', length(cocl2tocis_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#6dc49c","#999999")) + scale_fill_manual(values=c("#6dc49c","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example CoCl2toCis plot')
dev.off()
rm(cocl2tocis,cocl2tocis_lin_pearson_list,cocl2tocis_lin_pearson_rand_list, wilcox_pval_cocl2tocis,plot_df)
# cis ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cis_pearson_sim_results.RData')
wilcox_pvals_df$Cis <- as.numeric(wilcox_pval_cis)
weighted_mean_pearson_cis <- weighted.mean(unlist(lapply(cis_lin_pearson_list, mean)),
unlist(lapply(cis_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cis_sim <- sapply(1:length(cis_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cis_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cis_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/cis.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_cis_sim), aes(x = weighted_means_pearson_cis_sim))+geom_histogram(fill = '#c96d29', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_cis, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_cis+.03, label = round(weighted_mean_pearson_cis, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'Cis simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_cis+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(cis_lin_pearson_list, mean)),unlist(lapply(cis_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(cis_lin_pearson_list)),rep('Simulation', length(cis_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#c96d29","#999999")) + scale_fill_manual(values=c("#c96d29","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example cis plot')
dev.off()
rm(cis,cis_lin_pearson_list,cis_lin_pearson_rand_list, wilcox_pval_cis,plot_df)
# cistococl2 ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cistococl2_pearson_sim_results.RData')
wilcox_pvals_df$CistoCoCl2 <- as.numeric(wilcox_pval_cistococl2)
weighted_mean_pearson_cistococl2 <- weighted.mean(unlist(lapply(cistococl2_lin_pearson_list, mean)),
unlist(lapply(cistococl2_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cistococl2_sim <- sapply(1:length(cistococl2_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cistococl2_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cistococl2_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/cistococl2.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_cistococl2_sim), aes(x = weighted_means_pearson_cistococl2_sim))+geom_histogram(fill = '#f49129', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_cistococl2, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_cistococl2+.03, label = round(weighted_mean_pearson_cistococl2, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'CistoCoCl2 simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_cistococl2+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(cistococl2_lin_pearson_list, mean)),unlist(lapply(cistococl2_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(cistococl2_lin_pearson_list)),rep('Simulation', length(cistococl2_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#f49129","#999999")) + scale_fill_manual(values=c("#f49129","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example CistoCoCl2 plot')
dev.off()
rm(cistococl2,cistococl2_lin_pearson_list,cistococl2_lin_pearson_rand_list, wilcox_pval_cistococl2,plot_df)
# cistodabtram ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cistodabtram_pearson_sim_results.RData')
wilcox_pvals_df$CistoDabTram <- as.numeric(wilcox_pval_cistodabtram)
weighted_mean_pearson_cistodabtram <- weighted.mean(unlist(lapply(cistodabtram_lin_pearson_list, mean)),
unlist(lapply(cistodabtram_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cistodabtram_sim <- sapply(1:length(cistodabtram_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cistodabtram_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cistodabtram_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/cistodabtram.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_cistodabtram_sim), aes(x = weighted_means_pearson_cistodabtram_sim))+geom_histogram(fill = '#a23622', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_cistodabtram, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_cistodabtram+.03, label = round(weighted_mean_pearson_cistodabtram, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'CistoDabTram simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_cistodabtram+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(cistodabtram_lin_pearson_list, mean)),unlist(lapply(cistodabtram_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(cistodabtram_lin_pearson_list)),rep('Simulation', length(cistodabtram_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#a23622","#999999")) + scale_fill_manual(values=c("#a23622","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example CistoDabTram plot')
dev.off()
rm(cistodabtram,cistodabtram_lin_pearson_list,cistodabtram_lin_pearson_rand_list, wilcox_pval_cistodabtram,plot_df)
# cistocis ----
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/cistocis_pearson_sim_results.RData')
wilcox_pvals_df$CistoCis <- as.numeric(wilcox_pval_cistocis)
weighted_mean_pearson_cistocis <- weighted.mean(unlist(lapply(cistocis_lin_pearson_list, mean)),
unlist(lapply(cistocis_lin_pearson_list, length))) # true weighted mean of average correlations per lineage
weighted_means_pearson_cistocis_sim <- sapply(1:length(cistocis_lin_pearson_rand_list), function(y)
weighted.mean(unlist(lapply(cistocis_lin_pearson_rand_list[[y]], mean)),
unlist(lapply(cistocis_lin_pearson_rand_list[[y]], length)))) # List of weighted means of pearson correlations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/pearson_hist_plots/cistocis.pdf')
#Summary of all simulations
ggplot(data = as.data.frame(weighted_means_pearson_cistocis_sim), aes(x = weighted_means_pearson_cistocis_sim))+geom_histogram(fill = '#fbd08c', color = 'black', binwidth = 0.005) + geom_vline(xintercept = weighted_mean_pearson_cistocis, linetype = 'dashed', color = 'red') + geom_text(aes(x = weighted_mean_pearson_cistocis+.03, label = round(weighted_mean_pearson_cistocis, 3), y = 50), color = 'red') + labs(y = 'Number of simulations', x = 'Average weighted mean of correlations of cells within a lineage across 100 simulations', title = 'CistoCis simulation summary') + xlim(-.2, .2) + ylim(0,100) + geom_text(aes(x = weighted_mean_pearson_cistocis+.03, label = 'Observed', y = 70), color = 'red') + theme_classic()
# One example simulation
plot_df <- data.frame('Pearson' = c(unlist(lapply(cistocis_lin_pearson_list, mean)),unlist(lapply(cistocis_lin_pearson_rand_list[[1]], mean))), 'Condition' = c(rep('Data',length(cistocis_lin_pearson_list)),rep('Simulation', length(cistocis_lin_pearson_rand_list[[1]]))))
ggplot(plot_df, aes(x = Pearson, fill = Condition, color = Condition)) + geom_histogram(position="identity", alpha = 0.5, binwidth = 0.005) + xlim(-.2, .5) + ylim(0,100) + scale_color_manual(values=c("#fdb08c","#999999")) + scale_fill_manual(values=c("#fbd08c","#999999")) + labs(y = 'Number of lineages', x = 'Mean of pearson correlations per lineage', title = 'Example CistoCis plot')
dev.off()
rm(cistocis,cistocis_lin_pearson_list,cistocis_lin_pearson_rand_list, wilcox_pval_cistocis,plot_df)
write.csv(wilcox_pvals_df, '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/wilcox_pvals_sims.csv')
num_sig_sims <- as.data.frame(sapply(as.data.frame(wilcox_pvals_df <0.05), sum))
colnames(num_sig_sims) <- "Number_significant_simulations"
write.csv(num_sig_sims, '2022_01_14_analysis_scripts/2022_05_27_analysis/Lineage_expression/wilcox_pvals_sims_numsig.csv')
colnames(num_sig_sims) <- "Number_significant_simulations"
Error in `colnames<-`(`*tmp*`, value = "Number_significant_simulations") :
attempt to set 'colnames' on an object with less than two dimensions